parse.py 3.9 KB
Newer Older
gijs's avatar
gijs committed
1
2
3
4
import markdown
import os.path
import urllib

5
from .models import modelFor, collectionFor, UnknownContentTypeError, knownContentTypes, resolveReferences
6
from .utils import info, debug, error, warn, keyFilter
gijs's avatar
gijs committed
7
8
9
10
11
12
13
14

from markdown.extensions.toc import TocExtension
from py_etherpad import EtherpadLiteClient

from django.core.management.base import BaseCommand, CommandError
from django.utils.safestring import mark_safe
from etherpadlite.models import Pad

15
16
from .settings import DEFAULT_CONTENT_TYPE

gijs's avatar
gijs committed
17
18
from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
  
  We loop through all the pads and 'parse' them as markdown.
  This should return both the content and a dictionary for the metadata

  From this information a model is contstructed. The metadata is further
  parsed depending the field type.

  Links will try to look up their targets. If the pad isn't parsed yet a 
  stub is created to be filled later in the process. 

  TODO: decouple metadata parsing and linking. To make sure all data is seen
  before linking is performed.


34
35
36
37
  If both keys and labels are used to address models. Depending the order of
  encountering we might create an instance for the label and another for the
  key. Especially when the label / title is later changed.

38
39
"""

gijs's avatar
gijs committed
40
41
def parse_pads ():
  epclient = None
gijs's avatar
gijs committed
42
43
  models = []

gijs's avatar
gijs committed
44
45
46
47
48
49
50
51
  for pad in Pad.objects.all():
    if not epclient:
      epclient = EtherpadLiteClient(pad.server.apikey, pad.server.apiurl)

    name, extension = os.path.splitext(pad.display_slug)
    padID = pad.publicpadid if pad.is_public else pad.group.groupID + '$' + urllib.parse.quote(pad.name.replace(PAD_NAMESPACE_SEPARATOR, '_'))
    source = epclient.getText(padID)['text']

gijs's avatar
gijs committed
52
    info('Reading {}'.format(pad.display_slug))
gijs's avatar
gijs committed
53

gijs's avatar
gijs committed
54
    if extension in ['.md', '.markdown']:
55
      source = resolveReferences(source, source=None)
gijs's avatar
gijs committed
56
57
58
59
60
61
62
      md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
      content = mark_safe(md.convert(source))

      try:
        meta = md.Meta
        meta['pk'] = pad.pk

63
64
65
66
        # if the first line of the metadata is a known contenttype
        # use it as such. It's value becomes the key and potetntially
        # the label
        firstMetaKey, firstMetaValue = list(meta.items())[0]
gijs's avatar
gijs committed
67

68
69
70
        if firstMetaKey in knownContentTypes:
          contentType = firstMetaKey
          key = keyFilter(firstMetaValue)
gijs's avatar
gijs committed
71

72
73
74
75
76
77
78
79
80
81
82
83
84
          if 'type' in meta:
            warn('Both valid contenttype present in the first row ({0}) as well as a type declaration ({1}), using {0}'.format(contentType, meta['type'][0]), pad.display_slug)
        else:
          if 'type' in meta:
            if meta['type'] == ['biography']:
              meta['type'] = ['produser']
            contentType = meta['type'][0]
          else:
            contentType = DEFAULT_CONTENT_TYPE
          key = modelFor(contentType).extractKey(meta)

        collection = collectionFor(contentType)
                
gijs's avatar
gijs committed
85
        debug('Extracted key: {}'.format(key))
86
        model = collection.get(key=key)
gijs's avatar
gijs committed
87
88
        models.append(model)

gijs's avatar
gijs committed
89
90
        if model.empty:
          debug('Filling model {}'.format(key))
91
          model.fill(metadata=meta, content=content, source_path=pad.display_slug)
gijs's avatar
gijs committed
92
93
94
        else:
          error('Model for key {} already filled'.format(key))

95
96
        # resolveReferences()

gijs's avatar
gijs committed
97
      except UnknownContentTypeError as e:
gijs's avatar
gijs committed
98
99
        debug('Skipped `{}`'.format(name))
        debug(e)
gijs's avatar
gijs committed
100
101
        pass

gijs's avatar
gijs committed
102
    info('Read {}'.format(pad.display_slug))
gijs's avatar
gijs committed
103
104
    
  return models
105

gijs's avatar
gijs committed
106
107
108
109
110
111
112
113
114
115
class Command(BaseCommand):
  args = ''
  help = 'Generate a static interpretation of the pads'


  def handle(self, *args, **options):
    parse_pads()

    for produser in collectionFor('produser').models:
      for k in dir(produser):
gijs's avatar
gijs committed
116
        info(getattr(produser, k))
gijs's avatar
gijs committed
117
118
119

    # print(collectionFor('produser').models)
    # print(collectionFor('event').models, collectionFor('event').models[0].metadata, collectionFor('event').models[0].metadata['produser'].metadata)