Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
osp
tools.ethertoff
Commits
491afc46
Commit
491afc46
authored
Sep 15, 2019
by
alexandre
Browse files
WIP: testing parsing and storing RDFa with RDFlib
parent
13da1aae
Changes
3
Hide whitespace changes
Inline
Side-by-side
ethertoff/management/commands/rdfindex.py
0 → 100644
View file @
491afc46
from
django.core.management.base
import
BaseCommand
from
etherpadlite.models
import
Pad
from
django.contrib.sites.models
import
Site
from
django.urls
import
reverse
from
aasniff
import
AAApp
class
Conf
(
object
):
SNIFFERS
=
[
'HttpSniffer'
,
'HtmlSniffer'
,
]
STORE
=
{
'ENGINE'
:
'sqlite'
,
'NAME'
:
'aasniff.sqlite'
,
}
class
Command
(
BaseCommand
):
args
=
''
help
=
'Indexes pages'
def
handle
(
self
,
*
args
,
**
options
):
HOST
=
None
if
Site
.
objects
.
count
()
>
0
:
site
=
Site
.
objects
.
all
()[
0
]
HOST
=
site
.
domain
if
not
HOST
:
return
"No site domain settings found"
host
=
u
"http://%s"
%
HOST
app
=
AAApp
(
conf
=
Conf
)
for
pad
in
Pad
.
objects
.
filter
():
url
=
"{}{}"
.
format
(
host
,
reverse
(
'pad-read'
,
kwargs
=
{
'mode'
:
'r'
,
'slug'
:
pad
.
display_slug
}))
try
:
app
.
index
(
url
)
print
(
f
"indexed
{
url
}
"
)
except
:
print
(
f
"fail at indexing
{
url
}
"
)
ethertoff/management/commands/rdfindex2.py
0 → 100644
View file @
491afc46
from
django.core.management.base
import
BaseCommand
from
etherpadlite.models
import
Pad
from
django.contrib.sites.models
import
Site
from
django.urls
import
reverse
from
aasniff
import
AAApp
import
markdown
from
markdown.extensions.toc
import
TocExtension
class
Conf
(
object
):
SNIFFERS
=
[
'HttpSniffer'
,
'HtmlSniffer'
,
]
STORE
=
{
'ENGINE'
:
'sqlite'
,
'NAME'
:
'aasniff.sqlite'
,
}
class
Command
(
BaseCommand
):
args
=
''
help
=
'Indexes pages'
def
handle
(
self
,
*
args
,
**
options
):
from
django.contrib.sites.models
import
Site
domain
=
Site
.
objects
.
get_current
().
domain
app
=
AAApp
(
conf
=
Conf
)
for
pad
in
Pad
.
objects
.
filter
():
# TODO: make a Pad method for that
client
=
pad
.
epclient
pad_id
=
pad
.
padid
text
=
client
.
getText
(
pad_id
)[
'text'
]
path
=
reverse
(
'pad-read'
,
kwargs
=
{
'mode'
:
'r'
,
'slug'
:
pad
.
display_slug
})
# FIXME: handle https as well
url
=
f
"http://
{
domain
}{
path
}
"
md
=
markdown
.
Markdown
(
extensions
=
[
'extra'
,
'meta'
,
TocExtension
(
baselevel
=
2
),
'attr_list'
])
text
=
md
.
convert
(
text
)
html
=
f
"""<!DOCTYPE html>
<html lang="en">
<head>
<title>Example Document</title>
</head>
<body>
{
text
}
</body>
</html>"""
print
(
html
)
# print(f"parsing {url}")
# app.graph.parse(data=html, format="rdfa", publicID=url)
ethertoff/management/commands/rdfshow.py
0 → 100644
View file @
491afc46
from
django.core.management.base
import
BaseCommand
from
etherpadlite.models
import
Pad
from
django.contrib.sites.models
import
Site
from
django.urls
import
reverse
from
aasniff
import
AAApp
import
rdflib
class
Conf
(
object
):
SNIFFERS
=
[
'HttpSniffer'
,
'HtmlSniffer'
,
]
STORE
=
{
'ENGINE'
:
'sqlite'
,
'NAME'
:
'aasniff.sqlite'
,
}
class
Command
(
BaseCommand
):
args
=
''
help
=
'Indexes pages'
def
handle
(
self
,
*
args
,
**
options
):
app
=
AAApp
(
conf
=
Conf
)
# print(rt)
# for quad in graph.quads():
# print(quad)
node
=
rdflib
.
URIRef
(
"http://purl.org/dc/terms/title"
)
NS
=
{
'bibo'
:
rdflib
.
Namespace
(
"http://purl.org/ontology/bibo/"
),
'bib'
:
rdflib
.
Namespace
(
"http://purl.org/net/biblio#"
),
'dc'
:
rdflib
.
namespace
.
DC
,
'dcterms'
:
rdflib
.
namespace
.
DCTERMS
,
'egr2'
:
rdflib
.
Namespace
(
"http://rdvocab.info/ElementsGr2/"
),
'foaf'
:
rdflib
.
namespace
.
FOAF
,
'frbr'
:
rdflib
.
Namespace
(
"http://purl.org/vocab/frbr/core#"
),
'purl'
:
rdflib
.
Namespace
(
"http://purl.org/dc/terms/"
),
'rdf'
:
rdflib
.
RDF
,
'rdvocab'
:
rdflib
.
Namespace
(
"http://RDVocab.info/elements/"
),
'stats'
:
rdflib
.
Namespace
(
"http://kavan.land/vocab/stats#"
),
'vocab'
:
rdflib
.
Namespace
(
"http://purl.org/vocab/frbr/core#"
),
'wemi'
:
rdflib
.
Namespace
(
"http://RDVocab.info/RDARelationshipsWEMI/"
),
'z'
:
rdflib
.
Namespace
(
"http://www.zotero.org/namespaces/export#"
),
}
as_predicate
=
app
.
graph
.
query
(
"""
SELECT DISTINCT ?subject ?title
WHERE {
?subject dcterms:title ?title.
}
"""
,
initBindings
=
{
'predicate'
:
node
},
initNs
=
NS
)
print
(
as_predicate
)
for
i
in
as_predicate
:
print
(
i
[
1
])
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment