Commit 5038a0f6 authored by Dorian's avatar Dorian
Browse files

understanding actors infomartions

parent 9d293f1f
import json
import requests
# TODO:
# detect duplicates
# precise actors names
# (?) separate creation comments and new comments
# begin SQL database structures
# (the goal is not to duplicate their database!
# but to make something easy for us to layout the data)
url = 'http://fixmystreet.brussels/api/'
u_cat = 'categories'
u_inc = 'incidents'
headers = {'Accept': 'application/hal+json'}
originDate = '2000-01-01'
mobilierurbain_catid = 1007
potelet_catid = 2030
itemsbypages = 12
def getPoteletCat():
# get the potelet category json object with their subcategory
potelet_cat = {}
r = requests.get(url+u_cat, headers = headers)
categories = r.json()['response']['categories']
for category in categories:
if category['id'] == mobilierurbain_catid:
# print(category['nameFr'] + str(category['id']))
for subcategory in category['subCategories']:
# print(subcategory['nameFr'] + str(subcategory['id']))
if subcategory['id'] == potelet_catid:
potelet_cat = subcategory
return potelet_cat
def getNumberOfIncidents(category=''):
params = { 'startDate': originDate, 'size': itemsbypages }
if category:
params['category'] = category
r = requests.get(url+u_inc, headers = headers, params = params)
number = r.json()['page']['totalElements']
pages = r.json()['page']['totalPages']
return number, pages
def getPotelets():
# get all the poteleeeeets
# the category in params must be a subcategory with no subcategory
# so either we make a different request for every subcat of potelet <----
# or we make a global request and then retrieve in it every potelet object
# (if we put a big size number we get all the items in one requests)
potelets = []
# number = 1000
for potelet_subcat in potelet_cat['subCategories']:
total, pages = getNumberOfIncidents(potelet_subcat['id'])
print(str(total) + ' / ' + str(pages) + ' / ' + str(itemsbypages))
for i in range(pages):
params = { 'startDate': originDate,
'category': potelet_subcat['id'],
'size': itemsbypages,
'page': i }
print('requesting potelets (pages:' + str(i) + '/' + str(pages) + ').....')
r = requests.get(url+u_inc, headers = headers, params = params)
print('got it!')
potelets += r.json()['_embedded']['response']
return potelets
def getAttachments(id):
# get the attachments list (COMMENTS and PICTURES)
# of a potelet with its id
url_attachments = potelet['_links']['attachments']['href']
attachments = requests.get(url_attachments, headers = headers).json()
#sometimes it's just an empty dict...
if attachments:
attachments = attachments['response']
else:
#make it an empty array instead of empty dict
attachments = []
return attachments
def getHistory(id):
# get the history list (evolution of status and acceptations)
# of a potelet with its id
url_history = potelet['_links']['history']['href']
history = requests.get(url_history, headers = headers).json()
#sometimes it's just an empty dict...
if history:
history = history['response']
else:
#make it an empty array instead of empty dict
history = []
return history
def getActor(story):
# for the attachment its in ['reporter']
# for the history its in ['information']
# there can be contacts informations further in
actor = story['information']['actorType']
if actor == 'PROFESSIONAL':
corporation = story['information']['corporation']['nameEn']
team = story['information']['team']['nameEn']
actor = actor + ' (' + corporation + ' // ' + team + ')'
# there can be more infos!!
elif actor == 'SYSTEM':
corporation = story['information']['corporation']['nameEn']
actor = actor + ' (' + corporation + ')'
return actor
#-------------------------
print('~!~ POTELETS ~!~')
print('Total number of incidents: ' + str(getNumberOfIncidents()[0]))
potelet_cat = getPoteletCat()
# print(json.dumps(potelet_cat, indent=2))
for potelet_subcat in potelet_cat['subCategories']:
print(potelet_subcat['nameEn'] + ': ' + str(getNumberOfIncidents(potelet_subcat['id'])[0]))
print('\n')
potelets = getPotelets()
# print(json.dumps(potelets, indent=2))
img_list = []
for potelet in potelets:
img_list += [0]
# header
id = potelet['id']
subcat = potelet['category']['category']['nameEn']
adress = (potelet['location']['address']['streetNameFr'] + ' ' +
potelet['location']['address']['streetNumber'] + ', ' +
potelet['location']['address']['postalCode'])
creationdate = potelet['creationDate']
updateddate = potelet['updatedDate']
print('Potelet id: ' + str(id))
print('Status: ' + potelet['status'])
print('Category: ' + subcat)
print('Adress: ' + adress)
# print('creation date: ' + creationdate)
# print('updated date: ' + updateddate)
# --> those are in the history!
# attachments
attachments = getAttachments(id)
# print(json.dumps(attachments, indent=2))
if attachments:
print('---[ attachments ]---')
for attachment in attachments:
actor = attachment['reporter']['type']
date = attachment['creationDate']
if attachment['type']=='PICTURE':
img = attachment['_links']['content']['href']
img_list[-1] += 1
print('• ' + date + ' | ' + actor + ': ' + str(img))
elif attachment['type']=='COMMENT':
comment = attachment['content']
print('• ' + date + ' | ' + actor + ': ' + comment)
# history
# history = getHistory(id)
# # print(json.dumps(attachments, indent=2))
# if history:
# print('---[ history ]---')
# for story in history:
# actor = getActor(story)
# date = story['historyDate']
# type = story['historyType']
# print('• ' + date + ' | ' + actor + ': ' + type)
# if actor not in ['CITIZEN','PROFESSIONAL','SYSTEM']:
# print(id + ' -----> ' + actor)
# else:
# print(actor)
print('\n')
#-------------
print(img_list)
print( 'Total number of Potelets: ' + str(len(img_list)))
print( 'Total number of Images: ' + str(sum(img_list)))
print( 'Max number of images by potelet: ' + str(max(img_list)))
print( 'Average number of images by potelet: ' + str(sum(img_list) / len(img_list)))
import json
import requests
import math
# TODO:
# detect duplicates
url = 'http://fixmystreet.brussels/api/'
ucat = 'categories'
uinc = 'incidents'
headers = {'Accept': 'application/hal+json'}
originDate = '2000-01-01'
mobilierurbain_catid = 1007
potelet_catid = 2030
# ratio for the number of items got by request
itemsbypages = 12
def getPoteletCat():
# get the potelet category json object with their subcategory
potelet_cat = {}
r = requests.get(url+ucat, headers = headers)
categories = r.json()['response']['categories']
for category in categories:
if category['id'] == mobilierurbain_catid:
for subcategory in category['subCategories']:
if subcategory['id'] == potelet_catid:
potelet_cat = subcategory
return potelet_cat
def getNumberOfIncidents(category=''):
# get the number of incident by category or all if not precised
# and the number of pages/requests needed to get them all
params = { 'startDate': originDate, 'size': itemsbypages }
if category:
params['category'] = category
r = requests.get(url+uinc, headers = headers, params = params)
number = r.json()['page']['totalElements']
pages = r.json()['page']['totalPages']
return number, pages
def getPotelets(number_limit=0):
# get all the potelets
# the category in params must be a subcategory with no subcategory
# so we make a different request for every subcat of potelet
print('--- Starting to extract potelets ---')
potelets = []
for potelet_subcat in potelet_cat['subCategories']:
number, pages = getNumberOfIncidents(potelet_subcat['id'])
print('[ ' + potelet_subcat['nameEn'] + ' ] (' + str(number) + ')')
if number_limit:
print('requested: ' + str(number_limit))
pages = math.ceil(number_limit/itemsbypages)
print(str(pages) + ' pages of ' + str(itemsbypages))
for i in range(pages):
params = { 'startDate': originDate,
'category': potelet_subcat['id'],
'size': itemsbypages,
'page': i }
print('requesting potelets (pages: ' + str(i+1) + '/' + str(pages) + ').....')
r = requests.get(url+uinc, headers = headers, params = params)
potelets += r.json()['_embedded']['response']
print('')
return potelets
def getAttachments(id):
# get the attachments list (COMMENTS and PICTURES) of a potelet with its id
url_attachments = potelet['_links']['attachments']['href']
attachments = requests.get(url_attachments, headers = headers).json()
#sometimes it's just an empty dict, we transform it into a list...
attachments = attachments['response'] if attachments else []
return attachments
def getHistory(id):
# get the history list (changes of status) of a potelet with its id
url_history = potelet['_links']['history']['href']
history = requests.get(url_history, headers = headers).json()
history = history['response'] if history else []
return history
def getActorFromList(potelet):
''' get the responsible organisation and departement
always assigned to an incident '''
actor_corp = potelet['responsibleOrganisation']['nameEn']
actor_team = potelet['responsibleDepartment']['nameEn']
actor = actor_corp + ' // ' + actor_team
return actor
def getActorFromAttachment(attachment):
''' get the organisation or citizen who made the attachment
it's always only an orga (PROFFESSIONNAL or SYSTEM), but no info of the department '''
actor = ''
actor_type = attachment['reporter']['type']
if actor_type != 'CITIZEN':
# we have an id
# the only key we have here is corporation? -> always!
actor_name = attachment['reporter']['corporation']['nameEn']
actor = actor_name + ' (' + actor_type + ')'
else:
# no id
# if it's CITIZEN we have no other infos? -> never!
actor = actor_type
return actor
def getActorFromHistory(story):
''' get the organisation and departement who made the story
we have never an id from there :-(
if it's SYSTEM then there is no department precised
those are never CITIZEN'''
actor = ''
actor_type = story['information']['actorType']
if 'corporation' in story['information']:
actor_corp = story['information']['corporation']['nameEn']
actor = actor_corp
if 'team' in story['information']:
actor_team = story['information']['team']['nameEn']
actor += ' // ' + actor_team
actor += ' (' + actor_type + ')'
else:
actor = actor_type
return actor
if __name__ == '__main__':
print('~!~ POTELETS ~!~')
print('Total number of incidents: ' + str(getNumberOfIncidents()[0]))
print('')
potelet_cat = getPoteletCat()
# print(json.dumps(potelet_cat, indent=2))
potelets = getPotelets(12)
# print(json.dumps(potelets, indent=2))
actors = {}
# img_list = []
for potelet in potelets:
# img_list += [0]
#--- header
id = potelet['id']
status = potelet['status']
subcat = potelet['category']['category']['nameEn']
adress = (potelet['location']['address']['streetNameFr'] + ' ' +
potelet['location']['address']['streetNumber'] + ', ' +
potelet['location']['address']['postalCode'])
creationdate = potelet['creationDate']
updateddate = potelet['updatedDate']
actor= getActorFromList(potelet)
print('Potelet id: ' + str(id))
print('Status: ' + status)
print('Category: ' + subcat)
print('Adress: ' + adress)
print('responsible: ' + actor)
print('creation date: ' + creationdate)
print('updated date: ' + updateddate)
# --> those are in the history!
attachments = getAttachments(id)
if attachments:
print('---[ attachments ]---')
for attachment in attachments:
actor = getActorFromAttachment(attachment)
date = attachment['creationDate']
if attachment['type']=='PICTURE':
img = attachment['_links']['content']['href']
# img_list[-1] += 1
print('• ' + date + ' | ' + actor + ': ' + str(img))
elif attachment['type']=='COMMENT':
comment = attachment['content']
print('• ' + date + ' | ' + actor + ': ' + comment)
#--- history
history = getHistory(id)
if history:
print('---[ history ]---')
for story in history:
actor = getActorFromHistory(story)
date = story['historyDate']
type = story['historyType']
print('• ' + date + ' | ' + actor + ': ' + type)
print('')
# #-------------
# print(json.dumps(actors, indent=2))
# print(img_list)
# print( 'Total number of Potelets: ' + str(len(img_list)))
# print( 'Total number of Images: ' + str(sum(img_list)))
# print( 'Max number of images by potelet: ' + str(max(img_list)))
# print( 'Average number of images by potelet: ' + str(sum(img_list) / len(img_list)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment