Commit 404b04b8 authored by gijs's avatar gijs
Browse files

Start of restructured generation process, where first all models are prepared...

Start of restructured generation process, where first all models are prepared and only later linked.
parent a7691fcc
......@@ -40,6 +40,7 @@ INSTALLED_APPS = [
'django.contrib.staticfiles',
'django.contrib.sites',
'etherpadlite',
'generator'
#'south'
]
......@@ -154,3 +155,4 @@ except NameError:
except ImportError:
pass
API_LOCAL_URL = None
\ No newline at end of file
import re
from collections import OrderedDict
# Global Vars
META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)')
META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)')
BEGIN_RE = re.compile(r'^-{3}(\s.*)?')
END_RE = re.compile(r'^(-{3}|\.{3})(\s.*)?')
"""
Taken from the python markdown extension
"""
def extract_meta (content):
""" Extract and parse metadata from the file. """
meta = OrderedDict()
key = None
lines = content.split('\n')
if lines and BEGIN_RE.match(lines[0]):
lines.pop(0)
while lines:
line = lines.pop(0)
m1 = META_RE.match(line)
if line.strip() == '' or END_RE.match(line):
break # blank line or end of YAML header - done
if m1:
key = m1.group('key').lower().strip()
value = m1.group('value').strip()
try:
meta[key].append(value)
except KeyError:
meta[key] = [value]
else:
m2 = META_MORE_RE.match(line)
if m2 and key:
# Add another line to existing key
meta[key].append(m2.group('value').strip())
else:
lines.insert(0, line)
break # no meta data - done
return (meta, '\n'.join(lines))
......@@ -181,6 +181,7 @@ class MarkdownField(Field):
class InlineMarkdownField(Field):
def parse (self, value):
print(value)
md = markdown.Markdown(extensions=['extra', 'attr_list'])
return mark_safe(re.sub(r'<p>(.+)</p>', '\\1', md.convert(value)))
......
from generator.models import collectionFor, knownContentTypes, is_link, Model
from generator.utils import try_attributes
def display_link (direction, label, link=None):
arrow = '→' if direction == 'out' else '←'
......@@ -44,9 +45,9 @@ def make_index (models):
if type(val) is list:
for entry in val:
if isinstance(entry, Model):
buff += display_link('out', getattr(entry, entry.labelField), entry.source_path)
buff += display_link('out', try_attributes(entry, [entry.labelField, 'pk']), entry.source_path)
elif isinstance(val, Model):
buff += display_link('out', getattr(val, val.labelField), val.source_path)
buff += display_link('out', try_attributes(val, [val.labelField, 'pk']), val.source_path)
# As the attribute is not in the metadataFields
......@@ -57,9 +58,9 @@ def make_index (models):
if type(val) is list:
for entry in val:
if isinstance(entry, Model):
buff += display_link('in', getattr(entry, entry.labelField), entry.source_path)
buff += display_link('in', try_attributes(entry, [entry.labelField, 'pk']), entry.source_path)
elif isinstance(val, Model):
buff += display_link('in', getattr(val, val.labelField), val.source_path)
buff += display_link('in', try_attributes(val, [val.labelField, 'pk']), val.source_path)
buff += '</li>'
buff += '</ul><style>li { margin-top: 1em; }</style></body></html>'
return buff
......
......@@ -51,6 +51,38 @@ def generate_single_pages (models, template, outputdir, make_context):
for model in models:
output(os.path.join(outputdir, model.prefix, '{}.html'.format(model.key)), template, make_context(model))
def datesorter (obj):
if hasattr(obj, 'date'):
date = getattr(obj, 'date')
if isinstance(date, Date):
return date.date
elif isinstance(date, DateRange):
return date.start.date
return datetime.date(1,1,1)
def timesorter (obj):
if hasattr(obj, 'time'):
time = getattr(obj, 'time')
if isinstance(time, Time):
return time.time
elif isinstance(time, TimeRange):
return time.start
return datetime.time(0,0)
def datetimesorter (obj):
date = datesorter(obj)
time = timesorter(obj)
return datetime.datetime.combine(date, time)
def groupedProgrammeItems(event):
programmeItems = sorted(event.programmeItems, key=datetimesorter)
return regroup(programmeItems, lambda e: datesorter(e).strftime(DATE_OUTPUT_FORMAT))
produser_role_sorting = ['artist', 'co-producer', 'other professional', 'team', 'partner']
def generate ():
......@@ -89,7 +121,7 @@ def generate ():
grouped_produsers = sorted(regroup(sorted(produsers.models, key=lambda produser: try_attributes(produser, ['sortname', 'name', 'produser']).lower()), 'role'), key=lambda group: produser_role_sorting.index(group[0]) if group[0] in produser_role_sorting else inf)
output(os.path.join(outputdir, 'produsers.html'), 'produsers.html', { 'produsers': sorted(produsers.models, key=lambda r: str(getattr(r, r.labelField)).lower()), 'grouped_produsers': grouped_produsers })
output(os.path.join(outputdir, 'produsers.html'), 'produsers.html', { 'produsers': sorted(produsers.models, key=lambda r: str(try_attributes(r, ['sortname', 'name', 'produser', 'key'])).lower()), 'grouped_produsers': grouped_produsers })
# output(os.path.join(outputdir, 'produsers.layout.html'), 'produsers.layout.html', { 'produsers': sorted(produsers.models, key=lambda r: str(r.key)), 'grouped_produsers': grouped_produsers })
output(os.path.join(outputdir, 'tags.html'), 'tags.html', { 'tags': sorted(tags.models, key=lambda m: getattr(m, m.labelField)) })
output(os.path.join(outputdir, 'bibliography.html'), 'bibliography.html', { 'bibliography': sorted(bibliography.models, key=lambda m: getattr(m, m.labelField)) })
......@@ -102,42 +134,12 @@ def generate ():
# for event in events.models:
# output(os.path.join(outputdir, event.prefix, '{}.html'.format(event.key)), 'event.html', { 'event': event })
def datesorter (obj):
if hasattr(obj, 'date'):
date = getattr(obj, 'date')
if isinstance(date, Date):
return date.date
elif isinstance(date, DateRange):
return date.start.date
return datetime.date(1,1,1)
def timesorter (obj):
if hasattr(obj, 'time'):
time = getattr(obj, 'time')
if isinstance(time, Time):
return time.time
elif isinstance(time, TimeRange):
return time.start
return datetime.time(0,0)
def datetimesorter (obj):
date = datesorter(obj)
time = timesorter(obj)
return datetime.datetime.combine(date, time)
generate_single_pages(produsers.models, 'produser.html', outputdir, lambda produser: { 'produser': produser })
generate_single_pages(pages.models, 'page.html', outputdir, lambda page: { 'page': page })
generate_single_pages(tags.models, 'tag.html', outputdir, lambda tag: { 'tag': tag })
generate_single_pages(filter(lambda e: not hasattr(e, 'programmeItems') or not e.programmeItems, events.models), 'event.html', outputdir, lambda event: { 'event': event })
def groupedProgrammeItems(event):
programmeItems = sorted(event.programmeItems, key=datetimesorter)
return regroup(programmeItems, lambda e: datesorter(e).strftime(DATE_OUTPUT_FORMAT))
generate_single_pages(filter(lambda e: hasattr(e, 'programmeItems') and e.programmeItems, events.models), 'event-with-programme-items.html', outputdir, lambda event: { 'event': event, 'groupedProgrammeItems': groupedProgrammeItems(event)})
generate_single_pages(notes.models, 'note.html', outputdir, lambda note: { 'note': note })
......
......@@ -42,32 +42,44 @@ class LinkDifferentContentTypeError(Exception):
class Link(object):
def __init__ (self, contentType, reverse=None):
self.contentType = contentType
self.reverse = reverse
def __call__ (self, targetLabel, source):
debug('Link target {}'.format(targetLabel), color=CMAGENTA)
if type(targetLabel) is list:
targetLabel = targetLabel[0]
self.reverse = reverse # Reverse function with the soure object
def __call__ (self, targetLabel):
contentType = self.contentType
reverse = self.reverse
def create(source):
collection = collectionFor(contentType)
target = collection.get(label=targetLabel)
target = collectionFor(self.contentType).get(label=targetLabel)
if self.reverse:
self.reverse(target, source)
return target
if reverse and target:
reverse(target, source)
return target
return create
class MultiLink(Link):
def __call__ (self, targetLabels, source):
def __call__ (self, targetLabels):
debug('Link target keys', targetLabels, color=CMAGENTA)
# Filter out empty string keys
targets = [ collectionFor(self.contentType).get(label=targetLabel) for targetLabel in filter(None, targetLabels) ]
contentType = self.contentType
reverse = self.reverse
def link (source):
collection = collectionFor(contentType)
# filter(None, x) Filters out empty string keys
targets = [ collection.get(label=targetLabel) for targetLabel in filter(None, targetLabels) ]
if self.reverse:
for target in targets:
# Set the property
self.reverse(target, source)
if reverse:
for target in filter(None, targets):
reverse(target, source)
return targets
return targets
# This couls as well be a partian
return link
# This could as well be a partial?
class ReverseLink(object):
def __init__ (self, name):
self.linkName = name
......@@ -119,10 +131,20 @@ def includeQuestion(question, display_label):
def includeExternalProject(project, display_label):
return '<a href="{}" class="external-project">{}</a>'.format(try_attributes(project, ['link', 'project']), display_label if display_label else project.project)
def includeTag(tag, display_label, model):
# if model:
# try:
# if tag not in model.tags:
# model.tags.append(tag)
# except AttributeError:
# model.tags = [tag]
return '<span class="tag">{}</span>'.format(display_label if display_label else str(tag))
def labelReference(target, display_label):
return '<span class="{}">{}</span>'.format(target.contentType, display_label if display_label else str(target))
def renderReference(target, display_label=None):
def renderReference(target, display_label=None, model=None):
if target.contentType == 'video':
return includeVideo(target, display_label)
elif target.contentType == 'audio':
......@@ -135,6 +157,8 @@ def renderReference(target, display_label=None):
return includeExternalProject(target, display_label)
elif target.contentType == 'bibliography':
return labelReference(target, display_label)
elif target.contentType == 'tag':
return includeTag(target, display_label, model)
else:
return linkReference(target, display_label)
......@@ -168,7 +192,7 @@ def parseReferenceMetadata (raw):
else:
return (None, raw.strip())
def parseReference(match, collector=None):
def parseReference(match, collector=None, model=None):
contentType = match.group(1).strip()
label = match.group(2).strip()
metadata, display_label = parseReferenceMetadata(match.group(3)) if match.group(3) else (None, None)
......@@ -180,19 +204,15 @@ def parseReference(match, collector=None):
# debug('Rendered reference ', renderReference(target))
# Insert the metadata on the object ?
if metadata and target.empty:
target.fill(metadata)
if target:
if metadata and target.stub:
target.fill(metadata)
collector.append(target)
collector.append(target)
# if source and contentType == 'tag' and 'tags' in source.metadataFields:
# debug('Trying to extend tags')
# current = source.tags if hasattr(source, 'tags') else []
# if target not in current:
# source.tags = current + source.metadataFields['tags']([label], source)
# return ''
return renderReference(target, display_label=display_label)
return renderReference(target, display_label=display_label, model=model)
else:
return label
except UnknownContentTypeError:
return match.group(0)
......@@ -265,14 +285,14 @@ def parseShortTimecodes (content):
def expandTags (content):
return re.sub(r'\[\[\s*([^:\]]+)\s*\]\]', '[[tag: \\1]]', content)
def resolveReferences (content, source=None):
def resolveReferences (content, model=None, source=None):
# return content
collector = []
if content:
content = expandTags(content)
content = parseShortTimecodes(content)
content = parseTimecodes(content)
return (mark_safe(re.sub(r'\[\[([\w\._\-]+):([^\|\]]+)(?:\|(.[^\]+]+))?\]\]', partial(parseReference, collector=collector), content)), collector)
return (mark_safe(re.sub(r'\[\[([\w\._\-]+):([^\|\]]+)(?:\|(.[^\]+]+))?\]\]', partial(parseReference, collector=collector, model=model), content)), collector)
# return mark_safe(re.sub(r"\[\[(\w+):(.[^\]]+)\]\]", insertReference, content))
else:
return (content, [])
......@@ -285,7 +305,7 @@ class Model(object):
labelField = 'title'
metadata = {}
def __init__ (self, key=None, label=None, metadata=None, content=None):
def __init__ (self, key=None, label=None, metadata={}, content=None):
debug('Instantiating model of type {}, key: {}, label: {}'.format(self.contentType, key, label))
self.metadata = {}
......@@ -294,14 +314,14 @@ class Model(object):
else:
self.key = self.extractKey(metadata)
if label:
# debug('Setting label, {}, {}'.format(label, self.labelField))
self.__setattr__(self.labelField, [label])
if label and not self.labelField in metadata:
print('Setting label!')
self.__setattr__(self.labelField, label)
if metadata:
self.setMetadata(metadata)
self.empty = True
self.stub = True
if metadata or content:
self.fill(metadata=metadata, content=content)
......@@ -336,10 +356,10 @@ class Model(object):
# Overwrite or extend data. Etc.
def fill(self, metadata={}, content=None, source_path=None):
if metadata:
self.empty = False
self.stub = False
self.setMetadata(metadata)
if content:
self.empty = False
self.stub = False
self.content = content
if source_path:
self.source_path = source_path
......@@ -353,20 +373,23 @@ class Model(object):
elif name == 'source_path':
super().__setattr__('_source_path', value)
elif name in self.metadataFields:
if is_link(self.metadataFields[name]):
# If it is a link we also include, the obj
self.metadata[name] = self.metadataFields[name](value, self)
else:
self.metadata[name] = self.metadataFields[name](value)
self.metadata[name] = self.metadataFields[name](value)
else:
# This might not be the best idea?
self.metadata[name] = value
def resolveLinks(self):
print('Resolving links')
for fieldname in self.metadata:
print(fieldname, callable(fieldname))
if callable(self.metadata[fieldname]):
result = self.metadata[fieldname](self)
self.metadata[fieldname] = result
def __getattr__ (self, name):
if name in self.metadata:
return self.metadata[name]
else:
print(name)
# super().__getattr__(name)
# debug('Attribute error', name, self.metadata)
raise AttributeError()
......@@ -431,11 +454,8 @@ class Collection(object):
key = keyFilter(label)
if self.has(key):
debug('Found entry for {}'.format(key))
# debug('Found entry for {}'.format(key))
return self.index[key]
elif key:
debug('Could not find entry for {}, instantiating'.format(key))
return self.instantiateStub(key=key, label=label)
else:
return None
......@@ -450,7 +470,7 @@ class Collection(object):
if not self.has(obj.key):
self.models.append(obj)
self.index[obj.key] = obj
elif self.index[obj.key].empty:
elif self.index[obj.key].stub:
debug('Updating metadata for stub {}'.format(obj.key))
self.index[obj.key].setMetadata(obj.meta)
else:
......@@ -461,15 +481,28 @@ class Collection(object):
Instantiate a model for the given key, metadata and content
and register it on the collection.
"""
def instantiate (self, key, metadata=None, content=None):
obj = self.model(key=key, metadata=metadata, content=content)
def instantiate (self, key, label=None, metadata={}, content=None, source_path=''):
obj = self.model(key=key, label=label, metadata=metadata, content=content)
self.register(obj)
return obj
def instantiateStub (self, key, label=None):
obj = self.model(key=key, label=label)
self.register(obj)
return obj
"""
Instantiates a model if it isn't part of the collection.
Useful for objects like tags or questions
"""
class InstantiatingCollection (Collection):
def get (self, key = None, label = None):
if not label and not key:
raise(AttributeError('Can not retreive a model without a key or a label.'))
if not key:
key = keyFilter(label)
if self.has(key):
# debug('Found entry for {}'.format(key))
return self.index[key]
else:
return self.instantiate(key=key, label=[label])
class Event (Model):
contentType = 'event'
......@@ -497,6 +530,7 @@ class ProgrammeItem (Model):
labelField = 'title'
def link (self):
self.metadata
return self.event[0].link + '#' + self.key
metadataFields = {
......@@ -685,15 +719,15 @@ contentTypes = {
'trajectory': { 'model': Trajectory, 'collection': Collection(Trajectory) },
'pad': { 'model': Pad, 'collection': Collection(Pad) },
'page': { 'model': Page, 'collection': Collection(Page) },
'tag': { 'model': Tag, 'collection': Collection(Tag) },
'bibliography': { 'model': Bibliography, 'collection': Collection(Bibliography) },
'video': { 'model': Video, 'collection': Collection(Video) },
'audio': { 'model': Audio, 'collection': Collection(Audio) },
'image': { 'model': Image, 'collection': Collection(Image) },
'tag': { 'model': Tag, 'collection': InstantiatingCollection(Tag) },
'bibliography': { 'model': Bibliography, 'collection': InstantiatingCollection(Bibliography) },
'video': { 'model': Video, 'collection': InstantiatingCollection(Video) },
'audio': { 'model': Audio, 'collection': InstantiatingCollection(Audio) },
'image': { 'model': Image, 'collection': InstantiatingCollection(Image) },
'text': { 'model': Text, 'collection': Collection(Text) },
'notes': { 'model': Note, 'collection': Collection(Note) },
'external-project': { 'model': ExternalProject, 'collection': Collection(ExternalProject) },
'question': { 'model': Question, 'collection': Collection(Question) },
'external-project': { 'model': ExternalProject, 'collection': InstantiatingCollection(ExternalProject) },
'question': { 'model': Question, 'collection': InstantiatingCollection(Question) },
}
knownContentTypes = contentTypes.keys()
......
......@@ -16,6 +16,8 @@ from .settings import DEFAULT_CONTENT_TYPE
from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG
from generator.extract_meta import extract_meta
"""
We loop through all the pads and 'parse' them as markdown.
......@@ -24,13 +26,12 @@ from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG
From this information a model is contstructed. The metadata is further
parsed depending the field type.
Links will try to look up their targets. If the pad isn't parsed yet a
stub is created to be filled later in the process.
Links will try to look up their targets. If the target pad isn't parsed yet
a stub is created to be filled later in the process.
TODO: decouple metadata parsing and linking. To make sure all data is seen
before linking is performed.
If both keys and labels are used to address models. Depending the order of
encountering we might create an instance for the label and another for the
key. Especially when the label / title is later changed.
......@@ -52,61 +53,59 @@ def parse_pads ():
info('Reading {}'.format(pad.display_slug))
if extension in ['.md', '.markdown']:
source, collectedLinkTargets = resolveReferences(source, source=None)
# source, collectedLinkTargets = resolveReferences(source, source=None)
md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
content = mark_safe(md.convert(source))
# md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
# content = mark_safe(md.convert(source))
meta, content = extract_meta(source)
label = None
try:
meta = md.Meta
# meta = md.Meta
meta['pk'] = pad.pk
# if the first line of the metadata is a known contenttype
# use it as such. It's value becomes the key and potetntially
# use it as such. It's value becomes the key and potentially
# the label
firstMetaKey, firstMetaValue = list(meta.items())[0]
if firstMetaKey in knownContentTypes:
contentType = firstMetaKey
key = keyFilter(firstMetaValue)
label = firstMetaValue
if 'type' in meta:
warn('Both valid contenttype present in the first row ({0}) as well as a type declaration ({1}), using {0}'.format(contentType, meta['type'][0]), pad.display_slug)
else:
if 'type' in meta:
if meta['type'] == ['biography']:
warn("Outdated contenttype biography. for pad: {}".format(pad.display_slug))
meta['type'] = ['produser']
contentType = meta['type'][0]
else:
debug("No contenttype found, applied default contenttype for pad: {}".format(pad.display_slug))
contentType = DEFAULT_CONTENT_TYPE
key = modelFor(contentType).extractKey(meta)
collection = collectionFor(contentType)
debug('Extracted key: {}'.format(key))
model = collection.get(key=key)
model = collection.instantiate(key=key, label=label, metadata=meta, content=content, source_path=pad.display_slug)
models.append(model)
if model.empty:
debug('Filling model {}'.format(key))
model.fill(metadata=meta, content=content, source_path=pad.display_slug)
else:
error('Model for key {} already filled'.format(key))
# resolveReferences()
if collectedLinkTargets:
# print('Collected link targets')
for linkTarget in collectedLinkTargets:
# print(linkTarget.contentType, linkTarget)
# if collectedLinkTargets:
# # print('Collected link targets')
# for linkTarget in collectedLinkTargets:
# # print(linkTarget.contentType, linkTarget)
# TODO, simplify linking process
# make references to more than just tags
if linkTarget.contentType == 'tag' and 'tags' in model.metadataFields:
current = model.tags if hasattr(model, 'tags') else []
# # TODO, simplify linking process
# # make references to more than just tags
# if linkTarget.contentType == 'tag' and 'tags' in model.metadataFields:
# current = model.tags if hasattr(model, 'tags') else []
if linkTarget not in current:
model.tags = current + model.metadataFields['tags']([str(linkTarget)], model)
# if linkTarget not in current:
# model.tags = current + model.metadataFields['tags']([str(linkTarget)], model)
except UnknownContentTypeError as e:
debug('Skipped `{}`'.format(name))
......@@ -114,7 +113,16 @@ def parse_pads ():
pass
info('Read {}'.format(pad.display_slug))
# Excecuting links
for m in models:
# resolve links
# collect inline links
content, _ = resolveReferences(m.content, model=m) # Second return are the collected references
# render markdown