Commit 0fddc5ba authored by eric's avatar eric
Browse files

Generate homepage from pad metadata

`python manage.py index` scrapes the site for RDF/A and than generate an index from it

What’s nice is that this part doesn’t need a database—
we create the graph each time from scratch.

We then convert the RDF/A to a form the template can handle, store it in a json,
and the index view uses this.
parent afd91dd5
......@@ -3,6 +3,7 @@
from django.db import models
from django.db.models.signals import pre_delete
from django.contrib.auth.models import User, Group
from django.core.urlresolvers import reverse
from django.utils.translation import ugettext_lazy as _
from py_etherpad import EtherpadLiteClient
......@@ -172,6 +173,9 @@ class Pad(models.Model):
def __unicode__(self):
return dewikify(self.display_slug)
def get_absolute_url(self):
return reverse('relearn.views.pad_read', args=[self.display_slug])
@property
def padid(self):
return "%s$%s" % (self.group.groupID, self.name)
......
......@@ -30,11 +30,6 @@ class Command(BaseCommand):
padID = pad.group.groupID + '$' + urllib.quote_plus(pad.name.replace('::', '_'))
epclient = EtherpadLiteClient(pad.server.apikey, pad.server.apiurl)
# Etherpad gives us authorIDs in the form ['a.5hBzfuNdqX6gQhgz', 'a.tLCCEnNVJ5aXkyVI']
# We link them to the Django users DjangoEtherpadLite created for us
authorIDs = epclient.listAuthorsOfPad(padID)['authorIDs']
authors = PadAuthor.objects.filter(authorID__in=authorIDs)
text = epclient.getText(padID)['text']
backup_file_path = os.path.join(BACKUP_DIR, pad.display_slug)
......
# -*- coding: utf-8 -*-
# Python imports
import os
import codecs
import json
from urllib2 import HTTPError
# PyPi imports
import rdflib
# Django imports
from django.core.management.base import BaseCommand, CommandError
from django.template.loader import render_to_string
# Django Apps import
from etherpadlite.models import Pad, PadAuthor
from relearn.settings import BACKUP_DIR
"""
We scrape all the pages, construct a graph, and ask the RDF store to return us all the metadata.
The result will looks something like this:
+----------------------------------------------------------------------+--------------------------------------------+---------------------------------------------------+
| subject | predicate | object |
+----------------------------------------------------------------------+--------------------------------------------+---------------------------------------------------+
| http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md | http://purl.org/dc/terms/creator | Élodie Royer et Yoann Gourmel |
| http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md | http://purl.org/dc/terms/title | The Play/ザ・プレイ |
| http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md | http://www.w3.org/ns/md#item | http://www.w3.org/1999/02/22-rdf-syntax-ns#nil |
| http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md | http://purl.org/dc/terms/created | 2012-06-02T00:00:00 |
| http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md | http://www.w3.org/1999/xhtml/vocab#license | http://creativecommons.org/licenses/by-nd/3.0/fr/ |
| http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md | http://purl.org/dc/terms/title | Utopia |
| http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md | http://purl.org/dc/terms/creator | Bernadette Mayer |
| http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md | http://www.w3.org/1999/xhtml/vocab#license | |
| http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md | http://purl.org/dc/terms/created | 2014-10-02T00:00:00 |
| http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md | http://www.w3.org/ns/md#item | http://www.w3.org/1999/02/22-rdf-syntax-ns#nil |
+----------------------------------------------------------------------+--------------------------------------------+---------------------------------------------------+
We then use python to construct a list that is easy to use in a template, something like:
[
{
"date": "2012-06-02T00:00:00",
"title": "The Play/ザ・プレイ",
"href": "http://127.0.0.1:8000/r/06_Elodie_Royer_Yoann_Gourmel_The_Play_FR.md",
"license": "http://creativecommons.org/licenses/by-nd/3.0/fr/",
"author": "Élodie Royer et Yoann Gourmel"
},
{
"date": "2014-10-02T00:00:00",
"href": "http://127.0.0.1:8000/r/B_Bernadette-Mayer_Utopia_FR.md",
"author": "Bernadette Mayer",
"license": "",
"title": "Utopia"
}
]
"""
# This SPARQL QUERY it simply gets all the metadata in triples
sparql_query = """PREFIX dc: <http://purl.org/dc/elements/1.1/>
select ?subject ?predicate ?object
where { ?subject ?predicate ?object .\n
}
order by ?subject
"""
# Map between predicate uris and more easy names to use in the template
short_names = {
"http://www.w3.org/1999/xhtml/vocab#license" : "license",
"http://purl.org/dc/terms/title" : "title",
"http://purl.org/dc/terms/creator" : "author",
"http://purl.org/dc/terms/created" : "date",
}
def query_results_to_template_articles(query_results):
"""
Transform the RDFLIB SPARQL query result into the row that we want to use for the template
"""
template_articles = []
article = None
current_uri = None
for s, p, o in query_results:
uri = unicode(s).strip()
key = unicode(p).strip()
value = unicode(o).strip()
if uri != current_uri:
if article:
template_articles.append(article)
article = {
"href": uri
}
current_uri = uri
if key in short_names:
new_key = short_names[key]
article[new_key] = value
template_articles.append(article)
return template_articles
class Command(BaseCommand):
args = ''
help = 'Create an index of all the articles’ metadata'
def handle(self, *args, **options):
g = rdflib.Graph()
for pad in Pad.objects.all():
# We only want to index the articles—
# For now we can distinguish them because they have url’s
# ending in ‘.md’
if not pad.display_slug.endswith('.md'):
continue
try:
result = g.parse('http://127.0.0.1:8000' + pad.get_absolute_url())
except HTTPError, e:
if e.code == 403:
# Some of the pads will not be public yet—
# They gives a ‘403 FORBIDDEN’ response
# this is expected, and we don’t need to scrape them
continue
else:
raise
d = query_results_to_template_articles(g.query(sparql_query))
with open(os.path.join(BACKUP_DIR, "index.json"), 'w') as f:
f.write(json.dumps(d, indent=2, ensure_ascii=False).encode('utf-8'))
# with open(os.path.join(BACKUP_DIR, "index.html"), 'w') as f:
# f.write(render_to_string("home.html", {"articles" : d}).encode('utf-8'))
......@@ -16,7 +16,7 @@
{% block head_meta %}{% endblock %}
</head>
<body class="pad-{{ pad.display_name|slugify }} {{ mode }}-mode {% if user.is_authenticated and author %}logged-in{% else %}logged-out{% endif %}">
<body class="pad-{{ pad.display_name|slugify }} {{ mode }}-mode {% if user.is_authenticated and author %}logged-in{% else %}logged-out{% endif %} {% block bodyclasses %}{% endblock %}">
<nav>
<h1><a class="button" href="/">&lt;o&gt; future &lt;o&gt;</a></h1>
{% if pad %}
......
{% extends "pad-base.html" %}
{% load wikify relearn_tags %}
{% block bodyclasses %}pad-00md{% endblock %}
{% block title %}
<header>
<h1>
<a href="{% url 'home' %}"><img src="{{ MEDIA_URL }}images/eyes/{{ left_eye }}" alt="{{ site_name }}" /></a>
<span property="dc:title">Index</span>
<a href="{% url 'home' %}"><img src="{{ MEDIA_URL }}images/eyes/{{ right_eye }}" alt="{{ site_name }}" /></a>
</h1>
</header>
{% endblock %}
{% block content %}
<ul>{% for article in articles %}
<li>{{ article.author }} <a href="{{ article.href }}">{{ article.title }}</a></li>
{% endfor %}</ul>
{% endblock %}
{% block rightside %}
{% if author %}
<p>This is the invisible part</p>
{% endif %}
{% endblock %}
{% block extraui %}
{% if not user.is_authenticated %}
<div class="popup-wrapper hidden">
<div class="popup">
<form method="post" action="{% url 'django.contrib.auth.views.login' %}">
{% csrf_token %}
<label for="id_username">Username</label>
<input id="id_username" maxlength="254" name="username" type="text" />
<label for="id_password">Password</label>
<input id="id_password" name="password" type="password" />
<input class="submit" type="submit" value="Login" />
<input type="hidden" name="next" value="{% if next %}{{ next }}{% else %}{% url 'home' %}{% endif %}" />
</form>
</div>
</div>
{% endif %}
{% endblock %}
......@@ -45,6 +45,10 @@ try:
from relearn.settings import HOME_PAD
except ImportError:
HOME_PAD = 'start'
try:
from relearn.settings import BACKUP_DIR
except ImportError:
BACKUP_DIR = None
"""
Set up an HTMLParser for the sole purpose of unescaping
......@@ -364,13 +368,13 @@ def pad_read(request, pk=None, slug=None):
namespaces = [p.rstrip('-') for p in pad.display_slug.split('::')]
meta_list = []
if meta and len(meta.keys()) > 0:
print meta.keys()
# One needs to set a ‘Public’ metadata for the page to be accessible to outside visitors
if not 'public' in meta or not meta['public'][0] or meta['public'][0].lower() in ['false', 'no', 'off', '0']:
if not request.user.is_authenticated():
raise PermissionDenied
# One needs to set a ‘Public’ metadata for the page to be accessible to outside visitors
if not meta or not 'public' in meta or not meta['public'][0] or meta['public'][0].lower() in ['false', 'no', 'off', '0']:
if not request.user.is_authenticated():
raise PermissionDenied
if meta and len(meta.keys()) > 0:
# The human-readable date is parsed so we can sort all the articles
if 'date' in meta:
......@@ -397,10 +401,17 @@ def pad_read(request, pk=None, slug=None):
def home(request):
try:
Pad.objects.get(display_slug=HOME_PAD)
return pad_read(request, slug=HOME_PAD)
except Pad.DoesNotExist:
return HttpResponseRedirect(reverse('login'))
articles = json.load(open(os.path.join(BACKUP_DIR, 'index.json')))
tpl_params = { 'articles': articles }
return render_to_response("home.html", tpl_params, context_instance = RequestContext(request))
except IOError:
try:
Pad.objects.get(display_slug=HOME_PAD)
return pad_read(request, slug=HOME_PAD)
except Pad.DoesNotExist:
return HttpResponseRedirect(reverse('login'))
def css(request):
try:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment