Commit 5019e38f authored by Dorian's avatar Dorian
Browse files

extract and write all tables exept actors and trying to detect duplicated/reopened

parent b984e520
......@@ -13,11 +13,14 @@ If you want to run the script you'll need:
**Note:** extracting all the potelets and write them in the db with the python script takes approximatelly 4h.
```python3 db/extrac.py```
```
cd db
python3 extract.py
```
To look at db:
* ```sqlite3```
* ```.open db/potelets.db```
* ```.open potelets.db```
* ```.header on```
* ```.mode column```
* ```select * from potelets;```
......
# where there has been reopening asked
SELECT potelet_id FROM history WHERE type='INCIDENT_ASKED_REOPENING';
# where there is a lot of attachments
SELECT potelet_id, count(*) AS c
FROM attachments
GROUP BY potelet_id
HAVING c > 5;
# then get the attachments
SELECT * FROM attachments WHERE potelet_id=199249;
# where there is a lot of pictures/comment
SELECT potelet_id, count(*) AS c
FROM attachments
WHERE type='COMMENT'
GROUP BY potelet_id
HAVING c > 3;
# => got interesting result with this method (the one with a lot of comments)! look at 254080
# the one marked as duplicates
SELECT * FROM potelets WHERE duplicates>0;
# => mais pas d'accès a ceux qui sont les duplicates... (authentification required)
......@@ -19,15 +19,21 @@ mobilierurbain_catid = 1007
potelet_catid = 2030
# ratio for the number of items got by request
itemsbypages = 24
poteletbycategory = 48
itemsbypages = 12
poteletbycategory = 100
#---- DB WRITING ----
# TODO:
# actors and actors id in a separate table
# date as date sql field?
# order of iteration/construction for the table?
# - hardcode actors? or two differents tables for actors?
# PROBLEMS:
# - bool as boolean type in table
# - date as date type in table
# - we get to a story with a departement that was not yet added in the db
# then we actually have the information for the name but it will not be writen
# because we do not have an id to reference
# this could be avoided by runing all the incidents and attachment, then the story?
def empty_file(file):
""" delete file if exists and create a new empty one
......@@ -47,10 +53,10 @@ def create_connection(db_file):
conn = None;
try:
conn = sqlite3.connect(db_file)
conn.row_factory = sqlite3.Row
return conn
except Error as e:
print(e)
return conn
def create_table(conn, table_sql):
""" create a table from the table_sql statement
......@@ -71,7 +77,11 @@ def init_poteletsDB(conn):
adress text,
coordinates text,
creationDate text,
updatedDate text
updatedDate text,
duplicates text,
severalOccurrence text,
responsibleOrganisation_id integer,
responsibleDepartment_id integer
); """
attachments_table = """ CREATE TABLE IF NOT EXISTS attachments (
......@@ -79,19 +89,31 @@ def init_poteletsDB(conn):
potelet_id integer,
date text,
type text,
content text
content text,
actor_id integer
); """
history_table = """ CREATE TABLE IF NOT EXISTS history (
id integer PRIMARY KEY,
potelet_id integer,
date text,
type text
type text,
organisation_id integer,
department_id integer
); """
actors_table = """ CREATE TABLE IF NOT EXISTS actors (
id integer PRIMARY KEY,
name text,
type text,
corporation_type text,
contact text
); """
create_table(conn, potelets_table);
create_table(conn, attachments_table);
create_table(conn, history_table);
create_table(conn, actors_table);
def addPotelet(conn, potelet):
# basic data
......@@ -113,20 +135,27 @@ def addPotelet(conn, potelet):
print('updated date: ' + updatedDate)
# others data (not always present)
# duplicates = potelet['duplicates']
duplicates = potelet['duplicates']
severalOccurrence = potelet['severalOccurrence']
# resolved = potelet['declaredResolved']
# occurence = potelet['severalOccurrence']
# third_party = potelet['thirdParty']
# ext_id = potelet['externalId']
# priv_location = potelet['privateLocation']
# ADD ACTORS RESP ORG / DEP
# orga_short, depa_short = addActorFromIncident(conn, potelet)
# responsibleOrganisation_id = potelet['responsibleOrganisation']['id']
# responsibleDepartment_id = potelet['responsibleDepartment']['id']
# print('responsibleOrga.: ' + orga_short)
# print('responsibleDepa.: ' + depa_short)
# add it to potelets table
value_list = [id, status, subcat, adress, coordinates, creationDate, updatedDate]
sql = ''' INSERT INTO potelets(id,status,subcat,adress, coordinates, creationDate,updatedDate)
VALUES(?,?,?,?,?,?,?) '''
# value_list = [id, status, subcat, adress, coordinates, creationDate, updatedDate, responsibleOrganisation_id, responsibleDepartment_id]
# sql = ''' INSERT INTO potelets(id,status,subcat,adress,coordinates,creationDate,updatedDate,responsibleOrganisation_id,responsibleDepartment_id)
# VALUES(?,?,?,?,?,?,?,?,?) '''
value_list = [id, status, subcat, adress, coordinates, creationDate, updatedDate, duplicates, severalOccurrence]
sql = ''' INSERT INTO potelets(id,status,subcat,adress,coordinates,creationDate,updatedDate, duplicates, severalOccurrence)
VALUES(?,?,?,?,?,?,?,?,?) '''
cur = conn.cursor()
cur.execute(sql, value_list)
conn.commit()
......@@ -142,10 +171,16 @@ def addAttachment(conn, attachment):
elif type=='COMMENT' or type=='SYSTEM_COMMENT':
content = attachment['content']
else:
print("ERROR: attachment of unkown type " + type)
print('• ' + date + ' | add actor: ' + content)
print("Error: attachment of unkown type " + type)
# actor_id, actor_short = addActorFromAttachment(conn, attachment)
# print('• ' + date + ' | ' + actor_short + ': ' + content)
print('• ' + date + ' | ' + content)
# add it to attachments table
# value_list = [id, potelet_id, date, type, content, actor_id]
# sql = ''' INSERT INTO attachments(id,potelet_id,date,type,content,actor_id)
# VALUES(?,?,?,?,?,?) '''
value_list = [id, potelet_id, date, type, content]
sql = ''' INSERT INTO attachments(id,potelet_id,date,type,content)
VALUES(?,?,?,?,?) '''
......@@ -159,9 +194,14 @@ def addStory(conn, story):
date = story['historyDate']
type = story['historyType']
print('• ' + date + ' | add actor: ' + ': ' + type)
# orga_id, depa_id = addActorFromHistory(conn, story)
# print('• ' + date + ' | add actor: ' + ': ' + type)
print('• ' + date + ' | ' + type)
# add it to story table
# value_list = [id, potelet_id, date, type, orga_id, depa_id]
# sql = ''' INSERT INTO history(id,potelet_id,date,type,organisation_id,department_id)
# VALUES(?,?,?,?,?,?) '''
value_list = [id, potelet_id, date, type]
sql = ''' INSERT INTO history(id,potelet_id,date,type)
VALUES(?,?,?,?) '''
......@@ -169,6 +209,129 @@ def addStory(conn, story):
cur.execute(sql, value_list)
conn.commit()
def addActor(conn, value_list):
cur = conn.cursor()
# if it's not already in
cur.execute(''' SELECT * FROM actors WHERE id=? ''', [value_list[0]])
exists = cur.fetchall()
if not exists:
# add it to actors table
if len(value_list)==5:
sql = ''' INSERT INTO actors(id,name,type,contact,corporation_type)
VALUES(?,?,?,?,?) '''
elif len(value_list)==4:
sql = ''' INSERT INTO actors(id,name,type,contact)
VALUES(?,?,?,?) '''
cur.execute(sql, value_list)
conn.commit()
else:
if exists[0]['name'] != value_list[1]:
print("Error: two organisations/department with same id and different names")
print("ID: " + str(id))
print("names: " + exists[0]['name'] + " != " + value_list[1] )
def addActorFromIncident(conn, potelet):
""" add the responsible organisation and departement
always assigned to an incident when CREATED
"""
#-- organisation
orga = potelet['responsibleOrganisation']
orga_id = orga['id']
orga_name = orga['nameEn']
orga_type = orga['type']
orga_contact = json.dumps(orga['contact'])
orga_corpotype = orga['corporationType']
orga_list = [orga_id, orga_name, orga_type, orga_contact, orga_corpotype]
addActor(conn,orga_list)
#-- department
depa = potelet['responsibleDepartment']
depa_id = depa['id']
depa_name = depa['nameEn']
depa_type = depa['type']
depa_contact = json.dumps(depa['contact'])
depa_list = [depa_id, depa_name, depa_type, depa_contact]
addActor(conn,depa_list)
return orga_name, depa_name
def addActorFromAttachment(conn, attachment):
""" add the organisation who made the attachment
it's always only an orga (if PROFFESSIONNAL or SYSTEM),
but no info of the department
then we return the actor id which is 0 if it's CITIZEN
"""
actor_short = ''
actor_id = None
type = attachment['reporter']['type']
if type == 'CITIZEN': # it is CITIZEN, no actor id
# CHECK: do we have others infos ??? -> no
actor_short = type
actor_id = 0
else: # we have an id!
#-- organisation
orga = attachment['reporter']['corporation']
orga_id = orga['id']
orga_name = orga['nameEn']
orga_type = orga['type']
orga_contact = json.dumps(orga['contact'])
orga_corpotype = orga['corporationType']
orga_list = [orga_id, orga_name, orga_type, orga_contact, orga_corpotype]
addActor(conn,orga_list)
actor_short = orga_name + ' (' + type + ')'
actor_id = orga['id']
# CHECK: other keys than this 'corporation' ??? -> no
return actor_id, actor_short
def addActorFromHistory(conn, story):
"""" add the organisation and departement who made the story
we have never an id from there :-(
if it's SYSTEM then there is no department precised
"""
actor_short = ''
orga_id = None
depa_id = None
type = story['information']['actorType']
if type=='CITIZEN':
# CHECK: do we have others infos ??? -> no
actor_short = type
# actor_id = 0
else:
#-- organisation
if 'corporation' in story['information']:
orga = story['information']['corporation']
orga_name = orga['nameEn']
#find if this orga is already in db
cur = conn.cursor()
cur.execute(''' SELECT * FROM actors WHERE name=? ''', [orga_name])
exists = cur.fetchall()
if exists:
orga_id = exists[0]['id']
#-- department
if 'team' in story['information']:
depa = story['information']['team']
depa_name = depa['nameEn']
#find if this depa is already in db
cur = conn.cursor()
cur.execute(''' SELECT * FROM actors WHERE name=? ''', [depa_name])
exists = cur.fetchall()
if exists:
depa_id = exists[0]['id']
# actor += ' // ' + actor_team
# actor += ' (' + actor_type + ')'
# CHECK: other keys than this 'corporation' and 'team' ??? -> no
return orga_id, depa_id
#---- API EXTRACTING ----
......@@ -237,51 +400,6 @@ def getHistory(potelet):
history = history['response'] if history else []
return history
def getActorFromList(potelet):
""" get the responsible organisation and departement
always assigned to an incident
"""
actor_corp = potelet['responsibleOrganisation']['nameEn']
actor_team = potelet['responsibleDepartment']['nameEn']
actor = actor_corp + ' // ' + actor_team
return actor
def getActorFromAttachment(attachment):
""" get the organisation or citizen who made the attachment
it's always only an orga (PROFFESSIONNAL or SYSTEM), but no info of the department
"""
actor = ''
actor_type = attachment['reporter']['type']
if actor_type != 'CITIZEN':
# we have an id
# the only key we have here is corporation? -> always!
actor_name = attachment['reporter']['corporation']['nameEn']
actor = actor_name + ' (' + actor_type + ')'
else:
# no id
# if it's CITIZEN we have no other infos? -> never!
actor = actor_type
return actor
def getActorFromHistory(story):
"""" get the organisation and departement who made the story
we have never an id from there :-(
if it's SYSTEM then there is no department precised
those are never CITIZEN
"""
actor = ''
actor_type = story['information']['actorType']
if 'corporation' in story['information']:
actor_corp = story['information']['corporation']['nameEn']
actor = actor_corp
if 'team' in story['information']:
actor_team = story['information']['team']['nameEn']
actor += ' // ' + actor_team
actor += ' (' + actor_type + ')'
else:
actor = actor_type
return actor
if __name__ == '__main__':
......@@ -322,6 +440,7 @@ if __name__ == '__main__':
for story in history:
addStory(conn, story)
print("(+) potelets added in db")
print('')
conn.close()
......
No preview for this file type
......@@ -21,6 +21,7 @@ Those are like a thread of different actors interacting by posting images or tex
For every potelet there is an **history** thread/section, that contains its changes of status.
The status changes can give us information on how much times did it took before a responsible decided to take in charge the incident. This also contains the comments that were not the creation of the incident as *updates*.
<!-- permet de retrouver ceux qui ont eux des updates ou des transfers etc -->
### actors
......@@ -32,6 +33,10 @@ For every entries in the attachment or history section, there is an **actor**. T
For *PROFESSIONAL* and *SYSTEM*, there is two category of actors: *organisation* and *department* (**with different id**)
The departments are always linked to an organisation, giving more precision about who it is.
<!-- in fact organisation and departement must be two different tables.
https://fixmystreet.brussels/api/incidents/267692/ (resp org id = 4)
https://fixmystreet.brussels/api/incidents/266302/ (resp dep id = 4) -->
An actor is automaticaly assigned at the creation of an incident (*organisation* and *department*), even before it is marked *ACCEPTED/PROCESSING*.
<!-- There is no visible pointers of how the *department* and *organisation* are linked, but by iterating through the incidents we can create connections. -->
......@@ -85,6 +90,12 @@ Average number of images by potelet: 1.0595369349503858
* separate by **location**: *municipality*.
* separate by **type of location**: analysis on the picture and/or location to detect if it's a crossection, a small street, etc.
### filtering
* only those with marked as **re-opened** or **duplicates**
* only those with lots of **attachments**
* only those at specific **urbanistic contexts**
### ordering
* order by **creation date**
* order by **last updated date**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment