parse.py 2.82 KB
Newer Older
gijs's avatar
gijs committed
1
2
3
4
5
import markdown
import os.path
import urllib

from .models import modelFor, collectionFor, UnknownContentTypeError
gijs's avatar
gijs committed
6
from .utils import info, debug, error
gijs's avatar
gijs committed
7
8
9
10
11
12
13
14
15
16

from markdown.extensions.toc import TocExtension
from py_etherpad import EtherpadLiteClient

from django.core.management.base import BaseCommand, CommandError
from django.utils.safestring import mark_safe
from etherpadlite.models import Pad

from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
  
  We loop through all the pads and 'parse' them as markdown.
  This should return both the content and a dictionary for the metadata

  From this information a model is contstructed. The metadata is further
  parsed depending the field type.

  Links will try to look up their targets. If the pad isn't parsed yet a 
  stub is created to be filled later in the process. 

  TODO: decouple metadata parsing and linking. To make sure all data is seen
  before linking is performed.


"""

gijs's avatar
gijs committed
34
35
36
37
38
39
40
41
42
43
44
def parse_pads ():
  epclient = None
  
  for pad in Pad.objects.all():
    if not epclient:
      epclient = EtherpadLiteClient(pad.server.apikey, pad.server.apiurl)

    name, extension = os.path.splitext(pad.display_slug)
    padID = pad.publicpadid if pad.is_public else pad.group.groupID + '$' + urllib.parse.quote(pad.name.replace(PAD_NAMESPACE_SEPARATOR, '_'))
    source = epclient.getText(padID)['text']

gijs's avatar
gijs committed
45
    info('Reading {}'.format(pad.display_slug))
gijs's avatar
gijs committed
46

gijs's avatar
gijs committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
    if extension in ['.md', '.markdown']:
      md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
      content = mark_safe(md.convert(source))

      try:
        meta = md.Meta
        meta['pk'] = pad.pk

        if 'type' not in meta:
          meta['type'] = ['pad']

        if meta['type'] == ['biography']:
          meta['type'] = ['produser']

gijs's avatar
gijs committed
61

gijs's avatar
gijs committed
62
        collection = collectionFor(meta['type'][0])
gijs's avatar
gijs committed
63
64
65
66
67
68
        
        key = modelFor(meta['type'][0]).extractKey(meta)
        debug('Extracted key: {}'.format(key))
        model = collection.get(key)
        if model.empty:
          debug('Filling model {}'.format(key))
69
          model.fill(metadata=meta, content=content, source_path=pad.display_slug)
gijs's avatar
gijs committed
70
71
72
        else:
          error('Model for key {} already filled'.format(key))

gijs's avatar
gijs committed
73
      except UnknownContentTypeError as e:
gijs's avatar
gijs committed
74
75
        debug('Skipped `{}`'.format(name))
        debug(e)
gijs's avatar
gijs committed
76
77
        pass

gijs's avatar
gijs committed
78
    info('Read {}'.format(pad.display_slug))
gijs's avatar
gijs committed
79
80
81
82
83
84
85
86
87
88
89

class Command(BaseCommand):
  args = ''
  help = 'Generate a static interpretation of the pads'


  def handle(self, *args, **options):
    parse_pads()

    for produser in collectionFor('produser').models:
      for k in dir(produser):
gijs's avatar
gijs committed
90
        info(getattr(produser, k))
gijs's avatar
gijs committed
91
92
93

    # print(collectionFor('produser').models)
    # print(collectionFor('event').models, collectionFor('event').models[0].metadata, collectionFor('event').models[0].metadata['produser'].metadata)