parse.py 5.09 KB
Newer Older
gijs's avatar
gijs committed
1
2
3
4
import markdown
import os.path
import urllib

gijs's avatar
gijs committed
5
from .models import modelFor, collectionFor, UnknownContentTypeError, knownContentTypes, resolveReferences
6
from .utils import info, debug, error, warn, keyFilter
gijs's avatar
gijs committed
7
8
9
10
11
12
13
14

from markdown.extensions.toc import TocExtension
from py_etherpad import EtherpadLiteClient

from django.core.management.base import BaseCommand, CommandError
from django.utils.safestring import mark_safe
from etherpadlite.models import Pad

15
16
from .settings import DEFAULT_CONTENT_TYPE

gijs's avatar
gijs committed
17
18
from ethertoff.settings import PAD_NAMESPACE_SEPARATOR, BASE_DIR, DEBUG

19
20
from generator.extract_meta import extract_meta

21
22
23
24
25
26
27
28
"""
  
  We loop through all the pads and 'parse' them as markdown.
  This should return both the content and a dictionary for the metadata

  From this information a model is contstructed. The metadata is further
  parsed depending the field type.

29
30
  Links will try to look up their targets. If the target pad isn't parsed yet
  a stub is created to be filled later in the process. 
31
32
33
34

  TODO: decouple metadata parsing and linking. To make sure all data is seen
  before linking is performed.

35
36
37
38
  If both keys and labels are used to address models. Depending the order of
  encountering we might create an instance for the label and another for the
  key. Especially when the label / title is later changed.

39
40
"""

gijs's avatar
gijs committed
41
42
def parse_pads ():
  epclient = None
gijs's avatar
gijs committed
43
44
  models = []

gijs's avatar
gijs committed
45
46
47
48
49
50
51
52
  for pad in Pad.objects.all():
    if not epclient:
      epclient = EtherpadLiteClient(pad.server.apikey, pad.server.apiurl)

    name, extension = os.path.splitext(pad.display_slug)
    padID = pad.publicpadid if pad.is_public else pad.group.groupID + '$' + urllib.parse.quote(pad.name.replace(PAD_NAMESPACE_SEPARATOR, '_'))
    source = epclient.getText(padID)['text']

gijs's avatar
gijs committed
53
    info('Reading {}'.format(pad.display_slug))
gijs's avatar
gijs committed
54

gijs's avatar
gijs committed
55
    if extension in ['.md', '.markdown']:
56
      # source, collectedLinkTargets = resolveReferences(source, source=None)
gijs's avatar
gijs committed
57

58
59
60
61
      # md = markdown.Markdown(extensions=['extra', 'meta', TocExtension(baselevel=2), 'attr_list'])
      # content = mark_safe(md.convert(source))
      meta, content = extract_meta(source)
      label = None
gijs's avatar
gijs committed
62
63

      try:
64
        # meta = md.Meta
gijs's avatar
gijs committed
65
66
        meta['pk'] = pad.pk

67
        # if the first line of the metadata is a known contenttype
68
        # use it as such. It's value becomes the key and potentially
69
70
        # the label
        firstMetaKey, firstMetaValue = list(meta.items())[0]
gijs's avatar
gijs committed
71
        
72
73
74
        if firstMetaKey in knownContentTypes:
          contentType = firstMetaKey
          key = keyFilter(firstMetaValue)
75
          label = firstMetaValue
gijs's avatar
gijs committed
76

77
78
79
80
81
          if 'type' in meta:
            warn('Both valid contenttype present in the first row ({0}) as well as a type declaration ({1}), using {0}'.format(contentType, meta['type'][0]), pad.display_slug)
        else:
          if 'type' in meta:
            if meta['type'] == ['biography']:
82
              warn("Outdated contenttype biography. for pad: {}".format(pad.display_slug))
83
84
85
              meta['type'] = ['produser']
            contentType = meta['type'][0]
          else:
86
            debug("No contenttype found, applied default contenttype for pad: {}".format(pad.display_slug))
87
88
89
90
            contentType = DEFAULT_CONTENT_TYPE
          key = modelFor(contentType).extractKey(meta)

        collection = collectionFor(contentType)
91
        
gijs's avatar
gijs committed
92
        debug('Extracted key: {}'.format(key))
93
        model = collection.instantiate(key=key, label=label, metadata=meta, content=content, source_path=pad.display_slug)
gijs's avatar
gijs committed
94
        models.append(model)
95
96
        # resolveReferences()

97
98
99
100
        # if collectedLinkTargets:
        #   # print('Collected link targets')
        #   for linkTarget in collectedLinkTargets:
        #     # print(linkTarget.contentType, linkTarget)
gijs's avatar
gijs committed
101

102
103
104
105
        #     # TODO, simplify linking process
        #     # make references to more than just tags
        #     if linkTarget.contentType == 'tag' and 'tags' in model.metadataFields:
        #       current = model.tags if hasattr(model, 'tags') else []
gijs's avatar
gijs committed
106

107
108
        #       if linkTarget not in current:
        #         model.tags = current + model.metadataFields['tags']([str(linkTarget)], model)
gijs's avatar
gijs committed
109

gijs's avatar
gijs committed
110
      except UnknownContentTypeError as e:
gijs's avatar
gijs committed
111
112
        debug('Skipped `{}`'.format(name))
        debug(e)
gijs's avatar
gijs committed
113
114
        pass

gijs's avatar
gijs committed
115
    info('Read {}'.format(pad.display_slug))
116
117
118
119
120

    # Excecuting links
  for m in models:
    # resolve links
    # collect inline links
gijs's avatar
gijs committed
121
122
123
124
125
126
    if m.content:
      content, _ = resolveReferences(m.content, model=m) # Second return are the collected references
      # render markdown
      m.resolveLinks()
      md = markdown.Markdown(extensions=['extra', TocExtension(baselevel=2), 'attr_list'])
      m.content = mark_safe(md.convert(content))
gijs's avatar
gijs committed
127
  return models
128

gijs's avatar
gijs committed
129
130
131
132
133
134
135
136
137
138
class Command(BaseCommand):
  args = ''
  help = 'Generate a static interpretation of the pads'


  def handle(self, *args, **options):
    parse_pads()

    for produser in collectionFor('produser').models:
      for k in dir(produser):
gijs's avatar
gijs committed
139
        info(getattr(produser, k))
gijs's avatar
gijs committed
140
141
142

    # print(collectionFor('produser').models)
    # print(collectionFor('event').models, collectionFor('event').models[0].metadata, collectionFor('event').models[0].metadata['produser'].metadata)