Commit f3cb3f1b authored by alexandre's avatar alexandre

Started to fix duplicate issues in the store

parent 7efd5698
Pipeline #416 failed with stages
......@@ -90,7 +90,7 @@ class AASniffer(object):
class AAApp(object):
"""
>>> app = AAApp()
>>> app.index('http://kavan.land/statements/zotero/zotero.rdf')
>>> app.index('http://192.168.100.148:8000/r/le-pli.md')
"""
def __init__(self, conf=conf):
self.conf = conf
......@@ -129,29 +129,30 @@ class AAApp(object):
"""
Inspects the resource and store the information found in the RDF store.
"""
# Creates a temp graph so that sniffers can query previously discovered
# facts
store = IOMemory()
graph = rdflib.graph.ConjunctiveGraph(store=store)
request = requests.get(url, stream=True)
# Indexes the content with the appropriate agents (sniffers)
for sniffer in self.conf.SNIFFERS:
sniffer = registry[sniffer](request=request, model=graph)
# retrieves generated triples or the raw content if already in an RDF format
result = sniffer.sniff() if sniffer.test() else None
# If the sniffer produced some metadata...
if result:
if sniffer.syntax == 'triples':
# The sniffer generated statements; record the statements and where they come from
g = rdflib.graph.Graph(store=store, identifier=rdflib.URIRef(sniffer.ctx))
for statement in result:
g.add(statement)
graph.add(statement + (rdflib.URIRef(sniffer.ctx),))
else:
# The sniffer generated statements; record the statements and where they come from
g = rdflib.graph.Graph(store=store, identifier=rdflib.URIRef(request.url))
# The sniffer returned a parsable string, such as XML+RDF, HTML+RDFa...
# g.parse(data=result, source=url, format=sniffer.syntax)
# Fixme: find a way to specify the context
g.parse(data=result, format=sniffer.syntax)
## FIXME: fix encoding issue with the line below
# g.parse(data=result, format=sniffer.syntax, publicID=url)
graph.parse(location=url, format=sniffer.syntax)
# FIXME: does not work as expected for non-direct input: it removes all
# the statements associated to a context. Since this context is
......@@ -162,10 +163,12 @@ class AAApp(object):
# self.graph.remove_context(gg)
for quad in graph.quads():
# Adds the new statements
# avoids duplicates statements
self.graph.remove(quad)
# adds the new statements
self.graph.add(quad)
# print(self.graph.serialize(format='nquads'))
# self.graph += graph ## FIXME: removes context
from aasniff.sniffers.http import HttpSniffer
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment