Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
osp
tools.ethertoff
Commits
22f64984
Commit
22f64984
authored
Nov 18, 2019
by
alexandre
Browse files
Changed index code to avoid making http requests
parent
491afc46
Changes
4
Hide whitespace changes
Inline
Side-by-side
ethertoff/management/commands/rdfindex2.py
View file @
22f64984
...
...
@@ -5,6 +5,25 @@ from django.urls import reverse
from
aasniff
import
AAApp
import
markdown
from
markdown.extensions.toc
import
TocExtension
from
django.test
import
Client
import
html5lib
from
rdflib.plugins.memory
import
IOMemory
import
rdflib
def
tidy
(
string
):
parser
=
html5lib
.
HTMLParser
(
tree
=
html5lib
.
treebuilders
.
getTreeBuilder
(
"dom"
))
dom
=
parser
.
parse
(
string
)
# FIXME: remove this? we moved to rdflib
# Redland crashes if no xmlns attribute is declared.
# see: http://bugs.librdf.org/mantis/view.php?id=521
# Lets fix it in the meanwhile...
elt
=
dom
.
getElementsByTagName
(
"html"
)[
0
]
if
not
elt
.
hasAttribute
(
"xmlns"
):
elt
.
setAttribute
(
"xmlns"
,
"http://www.w3.org/1999/xhtml"
)
return
dom
.
toxml
()
class
Conf
(
object
):
...
...
@@ -29,30 +48,27 @@ class Command(BaseCommand):
app
=
AAApp
(
conf
=
Conf
)
for
pad
in
Pad
.
objects
.
filter
():
# TODO: make a Pad method for that
client
=
pad
.
epclient
pad_id
=
pad
.
padid
text
=
client
.
getText
(
pad_id
)[
'text'
]
# store = IOMemory()
# graph = rdflib.graph.ConjunctiveGraph(store=store)
for
pad
in
Pad
.
objects
.
filter
():
c
=
Client
()
path
=
reverse
(
'pad-read'
,
kwargs
=
{
'mode'
:
'r'
,
'slug'
:
pad
.
display_slug
})
# FIXME: handle https as well
response
=
c
.
get
(
path
)
url
=
f
"http://
{
domain
}{
path
}
"
md
=
markdown
.
Markdown
(
extensions
=
[
'extra'
,
'meta'
,
TocExtension
(
baselevel
=
2
),
'attr_list'
])
text
=
md
.
convert
(
text
)
html
=
f
"""<!DOCTYPE html>
<html lang="en">
<head>
<title>Example Document</title>
</head>
<body>
{
text
}
</body>
</html>"""
print
(
html
)
# print(f"parsing {url}")
# app.graph.parse(data=html, format="rdfa", publicID=url)
print
(
f
"parsing
{
url
}
"
)
if
response
.
status_code
==
200
:
try
:
app
.
graph
.
parse
(
data
=
tidy
(
response
.
content
),
format
=
"rdfa"
,
publicID
=
url
)
except
:
print
(
f
"couldn't parse
{
url
}
"
)
else
:
print
(
f
"failed to parse
{
url
}
"
)
# for quad in graph.quads():
# # avoids duplicates statements
# app.graph.remove(quad)
# # adds the new statements
# app.graph.add(quad)
ethertoff/templates/partials/all_authors.html
0 → 100644
View file @
22f64984
<ul>
{% for author in authors %}
<li>
{{ author.0 }}
</li>
{% endfor %}
</ul>
ethertoff/templatetags/index_tags.py
0 → 100644
View file @
22f64984
from
django
import
template
from
aasniff
import
AAApp
register
=
template
.
Library
()
class
Conf
(
object
):
SNIFFERS
=
[
'HttpSniffer'
,
'HtmlSniffer'
,
]
STORE
=
{
'ENGINE'
:
'sqlite'
,
'NAME'
:
'aasniff.sqlite'
,
}
@
register
.
inclusion_tag
(
'partials/all_authors.html'
)
def
all_authors
():
app
=
AAApp
(
conf
=
Conf
)
query
=
app
.
graph
.
query
(
"""
SELECT DISTINCT ?object
WHERE {
?subject <http://purl.org/dc/terms/contributor> ?object.
}
"""
)
return
{
"authors"
:
query
}
ethertoff/views.py
View file @
22f64984
...
...
@@ -596,7 +596,6 @@ def pad_read(request, mode="r", slug=None):
meta_list
=
list
(
meta
.
items
())
print
(
meta_list
)
tpl_params
=
{
'pad'
:
pad
,
'meta'
:
meta
,
# to access by hash, like meta.author
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment