parse.py 3.75 KB
Newer Older
gijs's avatar
gijs committed
1
2
3
4
import markdown
import os.path
import urllib

5
6
from .models import modelFor, collectionFor, UnknownContentTypeError, knownContentTypes
from .utils import info, debug, error, warn, keyFilter
gijs's avatar
gijs committed
7
8
9
10
11
12
13
14

from markdown.extensions.toc import TocExtension
from py_etherpad import EtherpadLiteClient

from django.core.management.base import BaseCommand, CommandError
from django.utils.safestring import mark_safe
from etherpadlite.models import Pad

15
16
from .settings import DEFAULT_CONTENT_TYPE

gijs's avatar
gijs committed
17
18
from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
  
  We loop through all the pads and 'parse' them as markdown.
  This should return both the content and a dictionary for the metadata

  From this information a model is contstructed. The metadata is further
  parsed depending the field type.

  Links will try to look up their targets. If the pad isn't parsed yet a 
  stub is created to be filled later in the process. 

  TODO: decouple metadata parsing and linking. To make sure all data is seen
  before linking is performed.


34
35
36
37
  If both keys and labels are used to address models. Depending the order of
  encountering we might create an instance for the label and another for the
  key. Especially when the label / title is later changed.

38
39
"""

gijs's avatar
gijs committed
40
41
42
43
44
45
46
47
48
49
50
def parse_pads ():
  epclient = None
  
  for pad in Pad.objects.all():
    if not epclient:
      epclient = EtherpadLiteClient(pad.server.apikey, pad.server.apiurl)

    name, extension = os.path.splitext(pad.display_slug)
    padID = pad.publicpadid if pad.is_public else pad.group.groupID + '$' + urllib.parse.quote(pad.name.replace(PAD_NAMESPACE_SEPARATOR, '_'))
    source = epclient.getText(padID)['text']

gijs's avatar
gijs committed
51
    info('Reading {}'.format(pad.display_slug))
gijs's avatar
gijs committed
52

gijs's avatar
gijs committed
53
54
55
56
57
58
59
60
    if extension in ['.md', '.markdown']:
      md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
      content = mark_safe(md.convert(source))

      try:
        meta = md.Meta
        meta['pk'] = pad.pk

61
62
63
64
        # if the first line of the metadata is a known contenttype
        # use it as such. It's value becomes the key and potetntially
        # the label
        firstMetaKey, firstMetaValue = list(meta.items())[0]
gijs's avatar
gijs committed
65

66
67
68
        if firstMetaKey in knownContentTypes:
          contentType = firstMetaKey
          key = keyFilter(firstMetaValue)
gijs's avatar
gijs committed
69

70
71
72
73
74
75
76
77
78
79
80
81
82
          if 'type' in meta:
            warn('Both valid contenttype present in the first row ({0}) as well as a type declaration ({1}), using {0}'.format(contentType, meta['type'][0]), pad.display_slug)
        else:
          if 'type' in meta:
            if meta['type'] == ['biography']:
              meta['type'] = ['produser']
            contentType = meta['type'][0]
          else:
            contentType = DEFAULT_CONTENT_TYPE
          key = modelFor(contentType).extractKey(meta)

        collection = collectionFor(contentType)
                
gijs's avatar
gijs committed
83
        debug('Extracted key: {}'.format(key))
84
85
        model = collection.get(key=key)
        
gijs's avatar
gijs committed
86
87
        if model.empty:
          debug('Filling model {}'.format(key))
88
          model.fill(metadata=meta, content=content, source_path=pad.display_slug)
gijs's avatar
gijs committed
89
90
91
        else:
          error('Model for key {} already filled'.format(key))

gijs's avatar
gijs committed
92
      except UnknownContentTypeError as e:
gijs's avatar
gijs committed
93
94
        debug('Skipped `{}`'.format(name))
        debug(e)
gijs's avatar
gijs committed
95
96
        pass

gijs's avatar
gijs committed
97
    info('Read {}'.format(pad.display_slug))
gijs's avatar
gijs committed
98
99
100
101
102
103
104
105
106
107
108

class Command(BaseCommand):
  args = ''
  help = 'Generate a static interpretation of the pads'


  def handle(self, *args, **options):
    parse_pads()

    for produser in collectionFor('produser').models:
      for k in dir(produser):
gijs's avatar
gijs committed
109
        info(getattr(produser, k))
gijs's avatar
gijs committed
110
111
112

    # print(collectionFor('produser').models)
    # print(collectionFor('event').models, collectionFor('event').models[0].metadata, collectionFor('event').models[0].metadata['produser'].metadata)