Commit 51ee4b1e authored by gijs's avatar gijs
Browse files

Page-breaks, columnbreaks, hacky fragility

parent 1f9d2ae9
......@@ -31,6 +31,79 @@ template = u"""<!DOCTYPE html>
</html>"""
def makeCircleHeader(title, soup):
asciii1 = soup.new_tag('pre')
asciii1['class'] = 'ascii'
asciii1.append(u'''
%%% %%%
%%% %%%
%%% %%%
%%% %%%''')
asciiiname = soup.new_tag('div')
asciiiname['class'] = 'asciiname'
asciii2 = soup.new_tag('pre')
asciii2['class'] = 'ascii'
asciii2.append(u'''%%% %%%
%%% %%%
%%% %%%
%%% %%%''')
asciiiname.append(title)
wrapper = soup.new_tag('section')
wrapper['class'] = 'asciiheaderwrapper groupheader center'
wrapper.append(asciii1)
wrapper.append(asciiiname)
wrapper.append(asciii2)
return wrapper
def makeBranch(level, tags, soup):
branch = soup.new_tag('ul')
leaf = None
while len(tags) > 0:
t = tags[0]
if t['level'] > level and leaf:
leaf.append(makeBranch(t['level'], tags, soup))
elif t['level'] < level:
if (leaf):
branch.append(leaf)
leaf = None
return branch
else:
if (leaf):
branch.append(leaf)
leaf = None
leaf = soup.new_tag('li')
leaf.append(tagContent(tags[0]['tag']))
tags.pop(0)
if (leaf):
branch.append(leaf)
leaf = None
return branch
def makeIndex(soup):
eligible_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'li']
buffer = soup.new_tag('ul')
tags = [{'tag': copy.copy(tag), 'level': eligible_tags.index(
tag.name)} for tag in soup.find_all(eligible_tags)]
return makeBranch(tags[0]['level'], tags, soup)
def tagContent(tag):
if tag.string:
return tag.string
......@@ -38,13 +111,16 @@ def tagContent(tag):
return ''.join(tag.strings)
def classSafeContent(string):
return re.sub(r'[^\w\-]+', '', re.sub(r'\s+', '-', string.lower())).encode("ascii", "replace")
def makeLemma(title, url, bigSoup):
print url
lemmaSoup = bs(urllib.urlopen(
'{0}?action=render'.format(url)), 'html.parser')
lemma = bigSoup.new_tag("section")
lemma['class'] = 'lemma {}'.format(
re.sub(r'[^\w\-]+', '', re.sub(r'\s+', '-', title.lower())).encode("ascii", "ignore"))
lemma['class'] = 'lemma {}'.format(classSafeContent(title))
sectiontitle = tagContent(child)
......@@ -70,6 +146,16 @@ def makeLemma(title, url, bigSoup):
return lemma
def pageBreaker(soup):
breaker = soup.new_tag('section')
breaker.attrs['class'] = 'page-breaker'
breaker.string = u' '
return breaker
doublebreak = ['algoliterary-explorations']
pages = (('en', 'http://www.algolit.net/index.php/Algoliterary_Encounters?action=render'),
('fr', 'http://algolit.net/index.php/Rencontres_Algolittéraires?action=render'))
......@@ -80,6 +166,13 @@ for (lang, url) in pages:
container = soup.new_tag('section')
container['class'] = u'language {}'.format(lang)
index = soup.new_tag('section')
index.attrs['class'] = 'index'
index.append(makeCircleHeader('Algoliterary Encounters' if (
lang == 'en') else 'Rencontres Algolittéraires', soup))
index.append(makeIndex(pageSoup))
soup.append(index)
for child in pageSoup.contents:
# print child.name
if child.name == 'ul':
......@@ -97,38 +190,15 @@ for (lang, url) in pages:
container.append(chapter)
elif child.name == 'h2':
sectiontitle = tagContent(child)
title = tagContent(child)
hid = classSafeContent(title)
header = makeCircleHeader(title, soup)
header.attrs['id'] = hid
asciii1 = soup.new_tag('pre')
asciii1['class'] = 'ascii'
asciii1.append(u'''
%%% %%%
%%% %%%
if (hid in doublebreak):
container.append(pageBreaker(soup))
%%% %%%
%%% %%%''')
asciiiname = soup.new_tag('div')
asciiiname['class'] = 'asciiname'
asciii2 = soup.new_tag('pre')
asciii2['class'] = 'ascii'
asciii2.append(u'''%%% %%%
%%% %%%
%%% %%%
%%% %%%''')
asciiiname.append(sectiontitle)
headerwrapper = soup.new_tag('section')
headerwrapper['class'] = 'asciiheaderwrapper groupheader center'
headerwrapper.append(asciii1)
headerwrapper.append(asciiiname)
headerwrapper.append(asciii2)
container.append(headerwrapper)
container.append(header)
elif child.name == 'h3':
sectiontitle = child.text
......@@ -150,6 +220,10 @@ for (lang, url) in pages:
else:
container.append(copy.copy(child))
for header in container.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
header.attrs['id'] = classSafeContent(tagContent(header))
header.replace_with(header)
soup.append(container)
with codecs.open('catalog.{}.html'.format(lang), 'w', encoding='utf-8') as out:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment