Commit 7ed6e965 authored by Dorian's avatar Dorian
Browse files

bug correction: attachments file type

parent 5baa725a
......@@ -10,6 +10,7 @@ If you want to run the script you'll need:
* python3
* sqlite3 (```sudo apt install sqlite3```)
## extracting
**Note:** extracting all the potelets and write them in the db with the python script takes approximatelly 4h.
......@@ -21,8 +22,10 @@ python3 extract.py
To look at db:
* ```sqlite3```
* ```.open potelets.db```
* ```.header on```
* ```.mode column```
* ```select * from potelets;```
* ```select * from attachments where potelet_id=265057;```
* ```select * from history where potelet_id=265057;```
## layout
next step
# where there has been reopening asked
# potelets re-opened case
SELECT potelet_id FROM history WHERE type='INCIDENT_ASKED_REOPENING';
# where there is a lot of attachments
# potelets marked as duplicates or severalOccurrence
SELECT * FROM potelets WHERE duplicates>0;
SELECT * FROM potelets WHERE severalOccurrence>0;
# => pas d'accès a ceux aux autres occurences ou duplicates...
# potelets where there is a lot of attachments
SELECT potelet_id, count(*) AS c
FROM attachments
GROUP BY potelet_id
......@@ -9,18 +15,17 @@ HAVING c > 5;
# then get the attachments
SELECT * FROM attachments WHERE potelet_id=199249;
# where there is a lot of pictures/comment
# potelets where there is a lot of pictures/comment
SELECT potelet_id, count(*) AS c
FROM attachments
WHERE type='COMMENT'
GROUP BY potelet_id
HAVING c > 3;
SELECT potelet_id, count(*) AS c FROM attachments WHERE type='COMMENT' GROUP BY potelet_id HAVING c > 3;
# => got interesting result with this method (the one with a lot of comments)! look at 254080
# the one marked as duplicates
SELECT * FROM potelets WHERE duplicates>0;
# => mais pas d'accès a ceux qui sont les duplicates... (authentification required)
# show all the text from COMMENTS
SELECT content
FROM attachments
WHERE type='COMMENT'
SORT BY potelet_id;
......@@ -19,8 +19,8 @@ mobilierurbain_catid = 1007
potelet_catid = 2030
# ratio for the number of items got by request
itemsbypages = 2
poteletbycategory = 2
itemsbypages = 24
poteletbycategory = None
#---- DB WRITING ----
......@@ -89,7 +89,8 @@ def init_poteletsDB(conn):
potelet_id integer,
date text,
type text,
content text,
text text,
href text,
actor_type text,
organisation text,
department text
......@@ -171,13 +172,19 @@ def addAttachment(conn, attachment):
potelet_id = attachment['incidentId']
date = attachment['creationDate']
type = attachment['type']
content = None;
if type=='PICTURE':
content = attachment['_links']['content']['href']
elif type=='COMMENT' or type=='SYSTEM_COMMENT':
content = attachment['content']
else:
print("Error: attachment of unkown type " + type)
attachment_short = ''
text = None;
href = None;
if 'text' in attachment:
# type = 'COMMENT' or 'SYSTEM_COMMENT' or 'FILE'
text = attachment['text']
attachment_short += text
if 'content' in attachment['_links']:
# type = 'PICTURE' or 'FILE'
href = attachment['_links']['content']['href']
attachment_short += href
# ADD ACTORS
# actor_id, actor_short = addActorFromAttachment(conn, attachment)
......@@ -207,16 +214,16 @@ def addAttachment(conn, attachment):
actor_short += ' (' + actor_type + ')'
# CHECK: other keys than 'corporation' ??? -> no
print('• ' + date + ' | ' + actor_short + ': ' + content)
print('• ' + date + ' | ' + actor_short + ': ' + attachment_short)
# add it to attachments table
# value_list = [id, potelet_id, date, type, content, actor_id]
# sql = ''' INSERT INTO attachments(id,potelet_id,date,type,content,actor_id)
# VALUES(?,?,?,?,?,?) '''
value_list = [id, potelet_id, date, type, content, actor_type, orga_name, depa_name]
sql = ''' INSERT INTO attachments(id,potelet_id,date,type,content,actor_type,organisation,department)
VALUES(?,?,?,?,?,?,?,?) '''
value_list = [id, potelet_id, date, type, text, href, actor_type, orga_name, depa_name]
sql = ''' INSERT INTO attachments(id,potelet_id,date,type,text,href,actor_type,organisation,department)
VALUES(?,?,?,?,?,?,?,?,?) '''
cur = conn.cursor()
cur.execute(sql, value_list)
......
No preview for this file type
......@@ -32,7 +32,6 @@ For every entries in the attachment or history section, there is an **actor**. T
For *PROFESSIONAL* and *SYSTEM*, there is two category of actors: *organisation* and *department* (**with different id**)
The departments are always linked to an organisation, giving more precision about who it is.
<!-- in fact organisation and departement must be two different tables.
https://fixmystreet.brussels/api/incidents/267692/ (resp org id = 4)
https://fixmystreet.brussels/api/incidents/266302/ (resp dep id = 4) -->
......@@ -86,7 +85,7 @@ Average number of images by potelet: 1.0595369349503858
### classification
* separate by **categories**: *dommaged* and *missing*.
* separate by **by responsible/process** (if there is): *responsible organisation* and/or *responsible departement*, and by status: *CREATED*, *PROCESSING*, *CLOSED*
* separate by **by responsible/process** (if there is): *responsible organisation* and/or *responsible departement*, and/or by status: *CREATED*, *PROCESSING*, *CLOSED*
* separate by **location**: *municipality*.
* separate by **type of location**: analysis on the picture and/or location to detect if it's a crossection, a small street, etc.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment