Commit 00e3037d authored by Dorian's avatar Dorian
Browse files

first post separation & comment/picture ordering optimisation

parent 22d5b50b
......@@ -3,18 +3,10 @@ import numpy as np
from utils.dbInteraction import (
createConnection,
# getPotelets,
getClusters,
# getAttachments,
# getHistory,
# getLastUpdate
)
from utils.formatingUtils import (
municipalityFilter,
# formatingCluster,
# formatingPotelet,
# format_date,
# addGeojsonToClusters
)
MUNICIPALITIES = {
......
......@@ -42,7 +42,7 @@ MUNICIPALITIES = {
}
CLUSTER_THRESHOLD = 4
CODE = 1190
CODE = 1050
DB_PATH = '../data/potelets.db'
GEOJSON_PATH = '../data/potelets_clustered.geojson'
......@@ -58,15 +58,15 @@ if __name__ == '__main__':
print('1. Getting back the potelets from the db ' + DB_PATH)
conn = createConnection(DB_PATH)
# potelets = getPotelets(conn, 100)
clusters = getClusters(conn, CLUSTER_THRESHOLD, True)
clusters = getClusters(conn, CLUSTER_THRESHOLD)
print("-- Data contains", len(clusters), "clusters with more than", CLUSTER_THRESHOLD, "incidents")
date = format_date(getLastUpdate(conn))
volume = {
"name": MUNICIPALITIES[CODE],
"code": CODE,
"threshold": CLUSTER_THRESHOLD,
"date": date
"name": MUNICIPALITIES[CODE],
"code": CODE,
"threshold": CLUSTER_THRESHOLD,
"date": date
}
print('2. Filtering Clusters by municipalities')
......@@ -79,6 +79,9 @@ if __name__ == '__main__':
for cluster in clusters:
cluster = formatingCluster(cluster)
for incident in cluster["incidents"]:
# we merge the tables in order to have one dict like object by potelet
incident["attachments"] = getAttachments(conn, incident)
incident["history"] = getHistory(conn, incident)
incident = formatingPotelet(incident)
t += len(cluster["incidents"])
volume["incident_size"] = t
......
......@@ -98,7 +98,7 @@ def getAttachments(conn, potelet):
""" return the attachments of a potelet by doing a sql request
"""
values = [potelet["id"]]
sql = ''' SELECT * FROM attachments WHERE potelet_id=(?) '''
sql = ''' SELECT * FROM attachments WHERE potelet_id=(?) ORDER BY date ASC'''
cur = conn.cursor()
cur.execute(sql, values)
attachments = [dict(attachment) for attachment in cur.fetchall()]
......@@ -109,14 +109,14 @@ def getHistory(conn, potelet):
""" return the history of a potelet by doing a sql request
"""
values = [potelet["id"]]
sql = ''' SELECT * FROM history WHERE potelet_id=(?) '''
sql = ''' SELECT * FROM history WHERE potelet_id=(?) ORDER BY date ASC'''
cur = conn.cursor()
cur.execute(sql, values)
history = [dict(story) for story in cur.fetchall()]
return history
def getClusters(conn, thres=0, getmore=False):
def getClusters(conn, thres=0):
""" return all clusters from the sqlite db
"""
# get a list of all the cluster id
......@@ -141,12 +141,6 @@ def getClusters(conn, thres=0, getmore=False):
cur.execute(potelets_sql, [id])
potelets = [dict(potelet) for potelet in cur.fetchall()]
if getmore:
for potelet in potelets:
# we merge the tables in order to have one dict like object by potelet
potelet["attachments"] = getAttachments(conn, potelet)
potelet["history"] = getHistory(conn, potelet)
cluster = { "id": id,
"incidents": potelets }
......
......@@ -30,6 +30,39 @@ def format_actor(actor_type, organisation, department):
print("ERROR: empty actor")
return actor
def is_first_post(attachment, first_actor, first_date):
return attachment["actor"] == first_actor and attachment["date"]["day"] == first_date["day"]
def sort_attachment(potelet):
#resort attachment in a way that for each day/actor we first have all the text,
#then all the images
# attachments_by_day = []
attachments = potelet["attachments"]
if len(attachments) > 1:
i = 1
while i < len(attachments):
swap = False
curr = attachments[i]
prev = attachments[i-1]
if curr["type"] == "COMMENT" and prev["type"] == "PICTURE":
day = curr["date"]["day"]
previous_day = prev["date"]["day"]
actor = curr["actor"]
previous_actor = prev["actor"]
if day == previous_day and actor == previous_actor:
# swap
print(potelet["id"], "swap!")
attachments[i], attachments[i-1] = attachments[i-1], attachments[i]
if i > 1:
i -= 1
swap = True
if not swap:
i += 1
return attachments
def formatingPotelet(potelet):
# loads the dumped json
......@@ -53,16 +86,36 @@ def formatingPotelet(potelet):
potelet["actor"] = format_actor("", potelet["responsibleOrganisation"], potelet["responsibleDepartment"])
has_img_or_txt = "empty-content"
if potelet["attachments"] != []:
for attachment in potelet["attachments"]:
attachment["date"] = format_date(attachment["date"])
attachment["actor"] = format_actor(attachment["actor_type"], attachment["organisation"], attachment["department"])
# detect if has content
type = attachment["type"]
if type == "COMMENT" or type == "PICTURE":
has_img_or_txt = "has-content"
potelet["img_or_txt"] = has_img_or_txt
# sort attachment by day
potelet["attachments"] = sort_attachment(potelet)
# detect first post after
first_attachment = potelet["attachments"][0]
first_actor = first_attachment["actor"]
first_date = first_attachment["date"]
first_posts = True;
for attachment in potelet["attachments"]:
# we switch after the last first post
if first_posts and not is_first_post(attachment, first_actor, first_date):
attachment["first"] = "first-after-initial-posts"
first_posts = False;
else:
attachment["first"] = ""
has_img_or_txt = False
for attachment in potelet["attachments"]:
attachment["date"] = format_date(attachment["date"])
attachment["actor"] = format_actor(attachment["actor_type"], attachment["organisation"], attachment["department"])
type = attachment["type"]
if type == "COMMENT" or type == "PICTURE":
has_img_or_txt = True
potelet["img_or_txt"] = has_img_or_txt
for story in potelet["history"]:
story["date"] = format_date(story["date"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment