Commit 67609c62 authored by alexandre's avatar alexandre
Browse files

Merge branch 'Issue2-Use-rdflib-to-store-rdfa-statements' into 'pad-management'

Issue2 use rdflib to store rdfa statements

See merge request !12
parents 5c98b8c1 e240dd98
......@@ -596,7 +596,6 @@ def pad_read(request, mode="r", slug=None):
meta_list = list(meta.items())
print(meta_list)
tpl_params = { 'pad' : pad,
'meta' : meta, # to access by hash, like meta.author
......
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class EthertoffRdfindexConfig(AppConfig):
name = 'ethertoff_rdfindex'
from django.core.management.base import BaseCommand
from etherpadlite.models import Pad
from django.contrib.sites.models import Site
from django.urls import reverse
from aasniff import AAApp
import markdown
from markdown.extensions.toc import TocExtension
from django.test import Client
import html5lib
from rdflib.plugins.memory import IOMemory
import rdflib
from ... import settings as app_settings
def tidy(string):
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom"))
dom = parser.parse(string)
# FIXME: remove this? we moved to rdflib
# Redland crashes if no xmlns attribute is declared.
# see: http://bugs.librdf.org/mantis/view.php?id=521
# Lets fix it in the meanwhile...
elt = dom.getElementsByTagName("html")[0]
if not elt.hasAttribute("xmlns"):
elt.setAttribute("xmlns", "http://www.w3.org/1999/xhtml")
return dom.toxml()
class Conf(object):
SNIFFERS = [
'HttpSniffer',
'HtmlSniffer',
]
STORE = app_settings.STORE
class Command(BaseCommand):
args = ''
help = 'Indexes pages'
def handle(self, *args, **options):
from django.contrib.sites.models import Site
domain = Site.objects.get_current().domain
app = AAApp(conf=Conf)
# store = IOMemory()
# graph = rdflib.graph.ConjunctiveGraph(store=store)
for pad in Pad.objects.filter():
c = Client()
path = reverse('pad-read', kwargs={'mode': 'r', 'slug': pad.display_slug})
response = c.get(path)
url = f"http://{domain}{path}"
print(f"parsing {url}")
if response.status_code == 200:
try:
app.graph.parse(data=tidy(response.content), format="rdfa", publicID=url)
except:
print(f"couldn't parse {url}")
else:
print(f"failed to parse {url}")
# for quad in graph.quads():
# # avoids duplicates statements
# app.graph.remove(quad)
# # adds the new statements
# app.graph.add(quad)
from django.core.management.base import BaseCommand
from etherpadlite.models import Pad
from django.contrib.sites.models import Site
from django.urls import reverse
from aasniff import AAApp
import rdflib
from ... import settings as app_settings
class Conf(object):
SNIFFERS = [
'HttpSniffer',
'HtmlSniffer',
]
STORE = app_settings.STORE
class Command(BaseCommand):
args = ''
help = 'Print indexed data and texts'
def handle(self, *args, **options):
app = AAApp(conf=Conf)
for quad in app.graph.quads():
print(quad)
node = rdflib.URIRef("http://purl.org/dc/terms/title")
NS = {
'bibo': rdflib.Namespace("http://purl.org/ontology/bibo/"),
'bib': rdflib.Namespace("http://purl.org/net/biblio#"),
'dc': rdflib.namespace.DC,
'dcterms': rdflib.namespace.DCTERMS,
'egr2': rdflib.Namespace("http://rdvocab.info/ElementsGr2/"),
'foaf': rdflib.namespace.FOAF,
'frbr': rdflib.Namespace("http://purl.org/vocab/frbr/core#"),
'purl': rdflib.Namespace("http://purl.org/dc/terms/"),
'rdf': rdflib.RDF,
'rdvocab': rdflib.Namespace("http://RDVocab.info/elements/"),
'stats': rdflib.Namespace("http://kavan.land/vocab/stats#"),
'vocab': rdflib.Namespace("http://purl.org/vocab/frbr/core#"),
'wemi': rdflib.Namespace("http://RDVocab.info/RDARelationshipsWEMI/"),
'z': rdflib.Namespace("http://www.zotero.org/namespaces/export#"),
}
as_predicate = app.graph.query("""
SELECT DISTINCT ?subject ?title
WHERE {
?subject dcterms:title ?title.
}
""", initBindings={'predicate': node}, initNs=NS)
for i in as_predicate:
print(i[1])
from django.db import models
# Create your models here.
from django.conf import settings
# OPTION_C = getattr(settings, '_'.join([NAMESPACE, 'OPTION_C']), None)
# if OPTION_C is None:
# raise ImproperlyConfigured('...')
STORE = getattr(settings, "ETHERTOFF_RDF_STORE", {
'ENGINE': 'sqlite',
'NAME': 'aasniff.sqlite',
})
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
# Create your views here.
......@@ -33,6 +33,7 @@ ALLOWED_HOSTS = []
INSTALLED_APPS = [
# 'example_theme', ## Add your theme here if any
'ethertoff',
'ethertoff_rdf_index',
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment