Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
algolit
algolit
Commits
51ee4b1e
Commit
51ee4b1e
authored
Nov 02, 2017
by
gijs
Browse files
Page-breaks, columnbreaks, hacky fragility
parent
1f9d2ae9
Changes
1
Hide whitespace changes
Inline
Side-by-side
algoliterary_encounter/catalog/makeCatalog.py
View file @
51ee4b1e
...
...
@@ -31,6 +31,79 @@ template = u"""<!DOCTYPE html>
</html>"""
def
makeCircleHeader
(
title
,
soup
):
asciii1
=
soup
.
new_tag
(
'pre'
)
asciii1
[
'class'
]
=
'ascii'
asciii1
.
append
(
u
'''
%%% %%%
%%% %%%
%%% %%%
%%% %%%'''
)
asciiiname
=
soup
.
new_tag
(
'div'
)
asciiiname
[
'class'
]
=
'asciiname'
asciii2
=
soup
.
new_tag
(
'pre'
)
asciii2
[
'class'
]
=
'ascii'
asciii2
.
append
(
u
'''%%% %%%
%%% %%%
%%% %%%
%%% %%%'''
)
asciiiname
.
append
(
title
)
wrapper
=
soup
.
new_tag
(
'section'
)
wrapper
[
'class'
]
=
'asciiheaderwrapper groupheader center'
wrapper
.
append
(
asciii1
)
wrapper
.
append
(
asciiiname
)
wrapper
.
append
(
asciii2
)
return
wrapper
def
makeBranch
(
level
,
tags
,
soup
):
branch
=
soup
.
new_tag
(
'ul'
)
leaf
=
None
while
len
(
tags
)
>
0
:
t
=
tags
[
0
]
if
t
[
'level'
]
>
level
and
leaf
:
leaf
.
append
(
makeBranch
(
t
[
'level'
],
tags
,
soup
))
elif
t
[
'level'
]
<
level
:
if
(
leaf
):
branch
.
append
(
leaf
)
leaf
=
None
return
branch
else
:
if
(
leaf
):
branch
.
append
(
leaf
)
leaf
=
None
leaf
=
soup
.
new_tag
(
'li'
)
leaf
.
append
(
tagContent
(
tags
[
0
][
'tag'
]))
tags
.
pop
(
0
)
if
(
leaf
):
branch
.
append
(
leaf
)
leaf
=
None
return
branch
def
makeIndex
(
soup
):
eligible_tags
=
[
'h1'
,
'h2'
,
'h3'
,
'h4'
,
'h5'
,
'li'
]
buffer
=
soup
.
new_tag
(
'ul'
)
tags
=
[{
'tag'
:
copy
.
copy
(
tag
),
'level'
:
eligible_tags
.
index
(
tag
.
name
)}
for
tag
in
soup
.
find_all
(
eligible_tags
)]
return
makeBranch
(
tags
[
0
][
'level'
],
tags
,
soup
)
def
tagContent
(
tag
):
if
tag
.
string
:
return
tag
.
string
...
...
@@ -38,13 +111,16 @@ def tagContent(tag):
return
''
.
join
(
tag
.
strings
)
def
classSafeContent
(
string
):
return
re
.
sub
(
r
'[^\w\-]+'
,
''
,
re
.
sub
(
r
'\s+'
,
'-'
,
string
.
lower
())).
encode
(
"ascii"
,
"replace"
)
def
makeLemma
(
title
,
url
,
bigSoup
):
print
url
lemmaSoup
=
bs
(
urllib
.
urlopen
(
'{0}?action=render'
.
format
(
url
)),
'html.parser'
)
lemma
=
bigSoup
.
new_tag
(
"section"
)
lemma
[
'class'
]
=
'lemma {}'
.
format
(
re
.
sub
(
r
'[^\w\-]+'
,
''
,
re
.
sub
(
r
'\s+'
,
'-'
,
title
.
lower
())).
encode
(
"ascii"
,
"ignore"
))
lemma
[
'class'
]
=
'lemma {}'
.
format
(
classSafeContent
(
title
))
sectiontitle
=
tagContent
(
child
)
...
...
@@ -70,6 +146,16 @@ def makeLemma(title, url, bigSoup):
return
lemma
def
pageBreaker
(
soup
):
breaker
=
soup
.
new_tag
(
'section'
)
breaker
.
attrs
[
'class'
]
=
'page-breaker'
breaker
.
string
=
u
' '
return
breaker
doublebreak
=
[
'algoliterary-explorations'
]
pages
=
((
'en'
,
'http://www.algolit.net/index.php/Algoliterary_Encounters?action=render'
),
(
'fr'
,
'http://algolit.net/index.php/Rencontres_Algolittéraires?action=render'
))
...
...
@@ -80,6 +166,13 @@ for (lang, url) in pages:
container
=
soup
.
new_tag
(
'section'
)
container
[
'class'
]
=
u
'language {}'
.
format
(
lang
)
index
=
soup
.
new_tag
(
'section'
)
index
.
attrs
[
'class'
]
=
'index'
index
.
append
(
makeCircleHeader
(
'Algoliterary Encounters'
if
(
lang
==
'en'
)
else
'Rencontres Algolittéraires'
,
soup
))
index
.
append
(
makeIndex
(
pageSoup
))
soup
.
append
(
index
)
for
child
in
pageSoup
.
contents
:
# print child.name
if
child
.
name
==
'ul'
:
...
...
@@ -97,38 +190,15 @@ for (lang, url) in pages:
container
.
append
(
chapter
)
elif
child
.
name
==
'h2'
:
sectiontitle
=
tagContent
(
child
)
title
=
tagContent
(
child
)
hid
=
classSafeContent
(
title
)
header
=
makeCircleHeader
(
title
,
soup
)
header
.
attrs
[
'id'
]
=
hid
asciii1
=
soup
.
new_tag
(
'pre'
)
asciii1
[
'class'
]
=
'ascii'
asciii1
.
append
(
u
'''
%%% %%%
%%% %%%
if
(
hid
in
doublebreak
):
container
.
append
(
pageBreaker
(
soup
))
%%% %%%
%%% %%%'''
)
asciiiname
=
soup
.
new_tag
(
'div'
)
asciiiname
[
'class'
]
=
'asciiname'
asciii2
=
soup
.
new_tag
(
'pre'
)
asciii2
[
'class'
]
=
'ascii'
asciii2
.
append
(
u
'''%%% %%%
%%% %%%
%%% %%%
%%% %%%'''
)
asciiiname
.
append
(
sectiontitle
)
headerwrapper
=
soup
.
new_tag
(
'section'
)
headerwrapper
[
'class'
]
=
'asciiheaderwrapper groupheader center'
headerwrapper
.
append
(
asciii1
)
headerwrapper
.
append
(
asciiiname
)
headerwrapper
.
append
(
asciii2
)
container
.
append
(
headerwrapper
)
container
.
append
(
header
)
elif
child
.
name
==
'h3'
:
sectiontitle
=
child
.
text
...
...
@@ -150,6 +220,10 @@ for (lang, url) in pages:
else
:
container
.
append
(
copy
.
copy
(
child
))
for
header
in
container
.
find_all
([
'h1'
,
'h2'
,
'h3'
,
'h4'
,
'h5'
,
'h6'
]):
header
.
attrs
[
'id'
]
=
classSafeContent
(
tagContent
(
header
))
header
.
replace_with
(
header
)
soup
.
append
(
container
)
with
codecs
.
open
(
'catalog.{}.html'
.
format
(
lang
),
'w'
,
encoding
=
'utf-8'
)
as
out
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment