Commit 22f64984 authored by alexandre's avatar alexandre
Browse files

Changed index code to avoid making http requests

parent 491afc46
......@@ -5,6 +5,25 @@ from django.urls import reverse
from aasniff import AAApp
import markdown
from markdown.extensions.toc import TocExtension
from django.test import Client
import html5lib
from rdflib.plugins.memory import IOMemory
import rdflib
def tidy(string):
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom"))
dom = parser.parse(string)
# FIXME: remove this? we moved to rdflib
# Redland crashes if no xmlns attribute is declared.
# see: http://bugs.librdf.org/mantis/view.php?id=521
# Lets fix it in the meanwhile...
elt = dom.getElementsByTagName("html")[0]
if not elt.hasAttribute("xmlns"):
elt.setAttribute("xmlns", "http://www.w3.org/1999/xhtml")
return dom.toxml()
class Conf(object):
......@@ -29,30 +48,27 @@ class Command(BaseCommand):
app = AAApp(conf=Conf)
for pad in Pad.objects.filter():
# TODO: make a Pad method for that
client = pad.epclient
pad_id = pad.padid
text = client.getText(pad_id)['text']
# store = IOMemory()
# graph = rdflib.graph.ConjunctiveGraph(store=store)
for pad in Pad.objects.filter():
c = Client()
path = reverse('pad-read', kwargs={'mode': 'r', 'slug': pad.display_slug})
# FIXME: handle https as well
response = c.get(path)
url = f"http://{domain}{path}"
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
text = md.convert(text)
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<title>Example Document</title>
</head>
<body>
{text}
</body>
</html>"""
print(html)
# print(f"parsing {url}")
# app.graph.parse(data=html, format="rdfa", publicID=url)
print(f"parsing {url}")
if response.status_code == 200:
try:
app.graph.parse(data=tidy(response.content), format="rdfa", publicID=url)
except:
print(f"couldn't parse {url}")
else:
print(f"failed to parse {url}")
# for quad in graph.quads():
# # avoids duplicates statements
# app.graph.remove(quad)
# # adds the new statements
# app.graph.add(quad)
<ul>
{% for author in authors %}
<li>{{ author.0 }}</li>
{% endfor %}
</ul>
from django import template
from aasniff import AAApp
register = template.Library()
class Conf(object):
SNIFFERS = [
'HttpSniffer',
'HtmlSniffer',
]
STORE = {
'ENGINE': 'sqlite',
'NAME': 'aasniff.sqlite',
}
@register.inclusion_tag('partials/all_authors.html')
def all_authors():
app = AAApp(conf=Conf)
query = app.graph.query("""
SELECT DISTINCT ?object
WHERE {
?subject <http://purl.org/dc/terms/contributor> ?object.
}
""")
return { "authors": query }
......@@ -596,7 +596,6 @@ def pad_read(request, mode="r", slug=None):
meta_list = list(meta.items())
print(meta_list)
tpl_params = { 'pad' : pad,
'meta' : meta, # to access by hash, like meta.author
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment