Commit 711afccc authored by Michael Murtaugh's avatar Michael Murtaugh
Browse files

calibre add

parent edfe71ee
# https://manual.calibre-ebook.com/db_api.html
# run with calibre-debug command installed by calibre
#
from calibre.library import db
import json
from pathlib import Path
from calibre.ebooks.metadata.book.base import Metadata, SIMPLE_GET
from xml.etree import ElementTree as ET
import html5lib
def innerHTML (elt):
if elt.text != None:
ret = elt.text
else:
ret = u""
return ret + u"".join([ET.tostring(x, method="html", encoding="unicode") for x in elt])
"""
add_books
Add the specified books to the library. Books should be an iterable of 2-tuples, each 2-tuple of the form (mi, format_map) where mi is a Metadata object and format_map is a dictionary of the form {fmt: path_or_stream}, for example: {'EPUB': '/path/to/file.epub'}.
SIMPLE_GET:
frozenset({'manifest', 'rights', 'uuid', 'timestamp', 'publication_type', 'thumbnail', 'spine', 'series', 'lpath', 'formats', 'title_sort', 'title', 'guide', 'authors', 'rating', 'author_sort', 'cover_data', 'comments', 'languages', 'toc', 'tags', 'book_producer', 'author_sort_map', 'pubdate', 'last_modified', 'db_id', 'size', 'publisher', 'cover', 'series_index', 'user_categories', 'mime', 'author_link_map', 'device_collections', 'application_id', 'identifiers'})
"""
# print (SIMPLE_GET)
db = db('../calibre').new_api
# for book_id in db.all_book_ids():
# print (book_id)
# d = db.get_metadata(book_id)
# print (d)
with open("verlag.json") as fin:
books_meta = json.load(fin)
books_meta = books_meta['items']
docs_path = Path("docs")
books_to_add = []
"""
authors:
- Ramírez, Catherine S.
date_added: '2007-10-01'
download: https://www.constantvzw.org/verlag/IMG/doc/Catherine_S_Ramirez.doc
download_status: 200
language: English
license: creativecommons.org/licenses/by-nc-sa/3.0/
projects:
- Stitch and Split
published_in:
- AS 178 Selves and Territories in Science Fiction
status: Selected text
subtitle: Humanism and Infrahumanism
themes:
- Science (-) Fiction
title: '"She Did Not Own Herself Any Longer". Slavery and the Promise of Humanism
in Octavia E. Butler’s Science Fiction'
type: text
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=41&mot_filtre=4&id_lang=0&debut_source_material=0
year: '2004'
authors:
- Zummer, Thomas
date_added: '2007-10-01'
download: http://data.constantvzw.org/s-a-s/16_zummer.pdf
download_status: 404
language: English
license: creativecommons.org/licenses/by-nc-sa/3.0/
projects:
- Stitch and Split
status: Resource
themes:
- Body and technology
title: 'Arrestments: Corporeality and Mediation'
type: file
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=38&mot_filtre=4&id_lang=0&debut_source_material=0
wayback: http://web.archive.org/web/20110814075540id_/http://data.constantvzw.org/s-a-s/16_zummer.pdf
wayback_view: http://web.archive.org/web/20110814075540/http://data.constantvzw.org/s-a-s/16_zummer.pdf
"""
LANGS = {}
LANGS['Español'] = "Spanish"
LANGS['Nederlands'] = "Dutch"
LANGS['Français'] = "French"
LANGS['English'] = "English"
def unsplit_author_name (text):
if "," in text:
last, first = text.split(",", 1)
return f"{first} {last}"
return text
for book in books_meta:
bookpath = docs_path / book['md5']
if 'authors' in book:
authors = [unsplit_author_name(x) for x in book['authors']]
else:
authors = []
if bookpath.exists():
files = [x for x in bookpath.iterdir() if x.is_file()]
else:
files = []
# covers = [x for x in files if x.suffix.lower() == ".jpg"]
# pubs = [x for x in files if x.suffix.lower() in (".epub", ".pdf") and x.name != "meta.epub"]
pubs = files
# print (bookpath, pubs)
cover_data = None
# if covers:
# with open(covers[0], "rb") as fin:
# cover_data = ('jpeg', fin.read())
# Make the "format map" (format files as dict with TYPE as keys)
formats = {}
if pubs:
for p in pubs:
formats[p.suffix.upper()] = str(p)
# if p.suffix.lower() == ".epub":
# formats["EPUB"] = str(p)
# # formats["EPUB"] = f"{book['path']}/{p.name}"
# elif p.suffix.lower() == ".pdf":
# formats["PDF"] = str(p)
if "EPUB" not in formats:
formats["EPUB"] = str(docs_path / f"{book['md5']}.epub")
# Make the metadata object
md = Metadata(book['title'], authors)
if 'year' in book:
md.pubdate = f"{book['year']}"
# is comments the description!
# if cover_data is not None:
# md.cover_data = cover_data
tags = ["Verlag"]
if 'projects' in book:
tags.extend(book['projects'])
if 'themes' in book:
tags.extend(book['themes'])
if 'status' in book:
tags.append(book['status'])
if 'type' in book:
tags.append(book['type'])
# if 'download_status' in book:
# tags.append(f"{book['download_status']}")
md.tags = tags
comments = ""
comments += f"""<p class="verlag">Originally published on <a class="verlag" href="{book['url']}">Constant Verlag</a></p>"""
if 'download' in book:
comments += f"""<p class="download">Download link: <a class="download" href="{book['download']}">{book['download']}</a>"""
if 'download_status' in book and book['download_status'] != 200:
comments += """ <a title="The link may be broken">⚠️</a>"""
comments += """</p>"""
if 'wayback' in book:
comments += f"""<p class="wayback">An archival copy appears to be available: <a class="wayback" href="{book['wayback']}">Wayback machine on archive.org</a></p>"""
if comments:
md.comments = comments
# # get the meta HTML
# with open(bookpath / "meta.html") as fin:
# t = html5lib.parse(fin.read(), namespaceHTMLElements=False)
# body = t.find("./body")
# ET.tostring(body, method="html")
# meta_html = innerHTML(body)
# # print (meta_html)
if 'language' in book:
md.language = LANGS.get(book['language'], book['language'])
books_to_add.append((md, formats))
results = db.add_books(books_to_add)
print ("RESULTS of add_books")
print (results)
"""
Title : Dune, Depolitization and Decolonizing the Future
Title sort : Salman Sayyid
Author(s) : Salman Sayyid [Sayyid]
Timestamp : 2022-01-22T17:36:24+00:00
Published : 0101-01-01T00:00:00+00:00
Comments : <div>
<p><a href="https://constantvzw.org/site/-Stitch-And-Split,74-.html">Stich and Split</a></p></div>
"""
......@@ -9,6 +9,10 @@
"\n",
"140 items\n",
"\n",
"TODO:\n",
"* Images in pages (need to localize for epub production?)\n",
"\n",
"\n",
"Sample item structure\n",
"\n",
"```yaml\n",
......@@ -2078,6 +2082,9 @@
" path = docs_path / item['md5']\n",
" path.mkdir(exist_ok=True)\n",
" filename = os.path.split(urlparse(url).path)[1]\n",
" base, suffix = os.path.splitext(filename)\n",
" if suffix.lower() == \".php\":\n",
" filename = base + \".html\"\n",
" wget(url, path / filename)\n",
"\n",
"for item in items['items']:\n",
......@@ -2093,35 +2100,41 @@
"id": "d1cb144a-9b0d-46d0-971c-b70140d4c258",
"metadata": {},
"source": [
"## Calibre"
"## Calibre\n",
"\n",
"eventually clear any previously generated library"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "582272f3-f04b-4b46-9c03-3793d9efc97f",
"execution_count": 79,
"id": "503027dd-aada-4a75-8d44-6eb60834e87e",
"metadata": {},
"outputs": [],
"source": [
"!rm -rf calibre"
]
},
{
"cell_type": "markdown",
"id": "4a37f258-6129-4b4f-b9bb-27b7d4e68c53",
"metadata": {},
"source": [
"Calibre-debug allows you to run a python script in a python session that has access to the full calibre API. It creates a new calibre database in a folder called calibre in this working directory"
]
},
{
"cell_type": "code",
"execution_count": 82,
"id": "2a1aede7-7d55-47de-88eb-5e29cd93439e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Traceback (most recent call last):\n",
" File \"runpy.py\", line 194, in _run_module_as_main\n",
" File \"runpy.py\", line 87, in _run_code\n",
" File \"site.py\", line 45, in <module>\n",
" File \"site.py\", line 41, in main\n",
" File \"calibre/debug.py\", line 336, in main\n",
" File \"calibre/debug.py\", line 243, in run_script\n",
" File \"polyglot/builtins.py\", line 110, in exec_path\n",
" File \"/home/murtaugh/projects/books/repos/verlag/calibre-add-books.py\", line 164, in <module>\n",
" results = db.add_books(books_to_add) \n",
" File \"calibre/db/cache.py\", line 1770, in add_books\n",
" File \"calibre/db/cache.py\", line 74, in call_func_with_lock\n",
" File \"calibre/db/cache.py\", line 1721, in create_book_entry\n",
" File \"calibre/db/cache.py\", line 1667, in author_sort_from_authors\n",
" File \"calibre/utils/icu.py\", line 180, in change_case\n",
"TypeError: Not a unicode string\n"
"RESULTS of add_books\n",
"([35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174], [])\n"
]
}
],
......@@ -2129,10 +2142,55 @@
"!calibre-debug calibre-add-books.py"
]
},
{
"cell_type": "markdown",
"id": "82983695-dfee-4dc9-a173-e74db686d098",
"metadata": {},
"source": [
"Run calibre with this folder as the library (Quit Calibre or use Kernel->Interupt to stop)"
]
},
{
"cell_type": "code",
"execution_count": 83,
"id": "325c1abf-b73b-4877-8cc5-4d627061d6e5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using library at /home/murtaugh/projects/books/repos/calibre\n",
"Traceback (most recent call last):\n",
" File \"calibre/gui2/ui.py\", line 135, in __init__\n",
" File \"calibre/gui2/ui.py\", line 153, in init_iaction\n",
" File \"calibre/customize/__init__.py\", line 624, in load_actual_plugin\n",
" File \"importlib/__init__.py\", line 127, in import_module\n",
" File \"<frozen importlib._bootstrap>\", line 1014, in _gcd_import\n",
" File \"<frozen importlib._bootstrap>\", line 991, in _find_and_load\n",
" File \"<frozen importlib._bootstrap>\", line 975, in _find_and_load_unlocked\n",
" File \"<frozen importlib._bootstrap>\", line 671, in _load_unlocked\n",
" File \"calibre/customize/zipplugin.py\", line 191, in exec_module\n",
" File \"calibre_plugins.opds_client.ui\", line 10, in <module>\n",
" from calibre_plugins.opds_client.main import OpdsDialog\n",
" File \"calibre/customize/zipplugin.py\", line 191, in exec_module\n",
" File \"calibre_plugins.opds_client.main\", line 32, in <module>\n",
" from calibre_plugins.opds_client.model import OpdsBooksModel\n",
" File \"calibre/customize/zipplugin.py\", line 191, in exec_module\n",
" File \"calibre_plugins.opds_client.model\", line 11, in <module>\n",
" import urllib2\n",
"ModuleNotFoundError: No module named 'urllib2'\n"
]
}
],
"source": [
"!calibre --with-library=../calibre"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d5ee2ca-0bc8-49e4-bff9-c57595fc863e",
"id": "fad39d73-f85f-4639-8705-dba0e374846f",
"metadata": {},
"outputs": [],
"source": []
......
%% Cell type:markdown id:4d63a8f5-868d-4a53-b25e-4167db56b9d2 tags:
# Scraping Verlag
140 items
TODO:
* Images in pages (need to localize for epub production?)
Sample item structure
```yaml
authors:
- Ramírez, Catherine S.
date_added: '2007-10-01'
download: https://www.constantvzw.org/verlag/IMG/doc/Catherine_S_Ramirez.doc
download_status: 200
language: English
license: creativecommons.org/licenses/by-nc-sa/3.0/
projects:
- Stitch and Split
published_in:
- AS 178 Selves and Territories in Science Fiction
status: Selected text
subtitle: Humanism and Infrahumanism
themes:
- Science (-) Fiction
title: '"She Did Not Own Herself Any Longer". Slavery and the Promise of Humanism
in Octavia E. Butler’s Science Fiction'
type: text
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=41&mot_filtre=4&id_lang=0&debut_source_material=0
year: '2004'
```
Another example (with a [wayback link (with wayback frame)](http://web.archive.org/web/20110814075540/http://data.constantvzw.org/s-a-s/16_zummer.pdf)... [and direct pdf](http://web.archive.org/web/20110814075540id_/http://data.constantvzw.org/s-a-s/16_zummer.pdf)):
```yaml
authors:
- Zummer, Thomas
date_added: '2007-10-01'
download: http://data.constantvzw.org/s-a-s/16_zummer.pdf
download_status: 404
language: English
license: creativecommons.org/licenses/by-nc-sa/3.0/
projects:
- Stitch and Split
status: Resource
themes:
- Body and technology
title: 'Arrestments: Corporeality and Mediation'
type: file
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=38&mot_filtre=4&id_lang=0&debut_source_material=0
wayback: http://web.archive.org/web/20110814075540id_/http://data.constantvzw.org/s-a-s/16_zummer.pdf
wayback_view: http://web.archive.org/web/20110814075540/http://data.constantvzw.org/s-a-s/16_zummer.pdf
```
%% Cell type:code id:a78ce119-7ec8-4e0f-a1a1-6e2d8e2e76f3 tags:
``` python
from urllib.request import urlopen
import html5lib
import csv
from urllib.parse import urljoin
from xml.etree import ElementTree as ET
import json
import re
```
%% Cell type:code id:57fd77da-3972-492f-b32f-f2f0e1f539c2 tags:
``` python
def scrape_items(url):
d = {}
seen = set()
d['items'] = items = []
r = urlopen(url)
t = html5lib.parse(r.read(), namespaceHTMLElements=False)
for a in t.findall(".//a"):
klass, href = (a.attrib.get("class"), urljoin(url, a.attrib.get("href")))
if klass in ["text", "publication", "file"] and href not in seen:
items.append({'url': href, 'type': klass})
seen.add(href)
return d
```
%% Cell type:code id:1993ecc7-76fb-430b-948f-69fd8adfa6ad tags:
``` python
items = None
```
%% Cell type:code id:d6dc690c-67fe-405b-a1ef-c63ab78731d9 tags:
``` python
# items = scrape_items("https://www.constantvzw.org/verlag/")
```
%% Cell type:code id:a4e85903-2a82-4774-90e3-ebd28b8459dd tags:
``` python
# LOAD ITEMS FROM JSON IF ALREADY PRESENT
if not items:
with open("verlag.json") as fin:
items = json.load(fin)
```
%% Cell type:code id:05d90b98-f6cf-4e52-8f39-edd41fbd4de8 tags:
``` python
# with open("verlag.csv", "w") as fout:
# csvout = csv.writer(fout)
# csvout.writerow(("class", "href"))
# for a in t.findall(".//a"):
# csvout.writerow((a.attrib.get("class"), urljoin(url, a.attrib.get("href"))))
```
%% Cell type:code id:26755ab3-b6d3-4dbe-854e-3f179036389c tags:
``` python
INFO_KEYS = (('projects', r'^this (?:selected text|project|resource|publication) belongs to the following projects?$', r'\s*,\s*'),
('published_in', r'^this (?:selected text|project|resource|publication) has been published in$', r'\s*,\s*'),
('themes', r'^this (?:selected text|project|resource|publication) is related to the following themes?$', r'\s*,\s*'),
(None, r'^licenses$', r'\s*,\s*'),
('date_added', r'^date added$', None))
```
%% Cell type:code id:b1d343f2-527e-4b04-850b-5ecabf2e4588 tags:
``` python
def scrape_info (info_div):
ret = []
for li in info_div.findall(".//li"):
label = li.find("span[@class='label']")
name, value = label.text.strip().lower().rstrip(":"), label.tail.strip()
for pref_name, pattern, split_pattern in INFO_KEYS:
if re.search(pattern, name):
if pref_name:
name = pref_name
if split_pattern:
value = re.split(split_pattern, value)
if name == "date_added":
value = value.split()[0] # just take the date (no time)
if value:
ret.append((name, value))
return ret
```
%% Cell type:code id:e49502e4-e1e0-4c8a-bf26-6561f289d9a0 tags:
``` python
def scrape_item (url, d=None):
print (f"scrape_item {url}")
f = urlopen(url)
t = html5lib.parse(f.read(), namespaceHTMLElements=False)
if d is None:
d = {}
d['title'] = t.find(".//h2").text
# subtitle
try:
d['subtitle'] = t.find(".//h3").text
except AttributeError:
pass
# authors
try:
authors = t.find(".//h4").text.strip()
d['authors'] = [x.strip() for x in authors.split("·")]
except AttributeError:
pass
dwld = t.find(".//div[@class='download']")
if dwld:
d['download'] = urljoin(url, dwld.find(".//a").attrib.get("href"))
info = t.find(".//div[@class='info']")
info = scrape_info(info)
print ("info", info)
d.update(info)
# info = [li.find(".//span").tail.strip() for li in info.findall(".//li")]
# print ("info", info)
# d['status'], d['language'], d['year'], d['date_added'], _, _, _ = info
# def extract_stacks(block):
# if block is not None:
# return [stack.find(".//div[@class='sticker']").text.strip() for stack in block.findall(".//div[@class='stack']")]
# else:
# return []
# d['projects'] = extract_stacks(t.find(".//div[@class='block project']"))
# d['themes'] = extract_stacks(t.find(".//div[@class='block themes']"))
# d['projects'] = projects = []
# for stack in block_project.findall(".//div[@class='stack']"):
# label = stack.find(".//div[@class='sticker']").text
# projects.append(label)
return d
```
%% Cell type:code id:f4263cf8-947a-4ebf-9374-19996fe811a1 tags:
``` python
#item_url = "https://www.constantvzw.org/verlag/spip.php?page=article&id_article=20&mot_filtre=5&id_lang=0&debut_source_material=0"
#print (json.dumps(scrape_item(item_url), indent=2))
```
%% Cell type:code id:11ba8c21-0aca-4bbc-a726-aa54e2501c45 tags:
``` python
def scrape_each_item(items):
for item in items['items']:
scrape_item(item['url'], item)
```
%% Cell type:code id:65225861-80b8-44e9-90e3-d22e9f448784 tags:
``` python
def save_to_json(path, data):
with open("verlag.json", "w") as fout:
print (json.dumps(items, indent=2), file=fout)
```
%% Cell type:markdown id:adeb3cad-70da-46f6-969e-a7e52cdadf1d tags:
## Check HTTP status / wayback
%% Cell type:code id:2d754edf-22e4-4537-9f61-ba0c61b19960 tags:
``` python
import urllib
import ssl
import wayback
```
%% Cell type:code id:fd6da321-4910-4afa-8f95-7482385c2993 tags:
``` python
def test_status(url, timeout=None):
print (f"Checking status {url}")
try:
f = urlopen(url, data=None, timeout=timeout)
return f.code
except Exception as e:
if hasattr(e, "code"):
return e.code
else:
return 0
```
%% Cell type:code id:8c09669a-d39d-4fa8-9227-ccbddd8c2ba7 tags:
``` python
def test_url_statuses(items, timeout=10):
codes_by_status = {}
for item in items['items']:
if 'download' in item and item['download']:
item['download_status'] = status = test_status(item['download'], timeout=timeout)
if status not in codes_by_status:
codes_by_status[status] = []
codes_by_status[status].append(item['url'])
return codes_by_status
```
%% Cell type:code id:674fac90-e28f-4e02-9fc1-ca055908c4b5 tags:
``` python
#!pip3 install wayback
```
%% Cell type:code id:b98f3d52-a79f-4266-b4e6-0e3c1923005b tags:
``` python
#wbc = wayback.WaybackClient()
# for result in wbc.search("http://www.constantvzw.com/copy.cult/texts/reg_liberty1.html"):
# print (result)
# result.raw_url
```
%% Cell type:markdown id:dbf0b7a4-3df6-450b-962d-2d72bc5146ad tags:
nice! the raw_url gives a direct interface free link (though the wayback interface can be handy for allowing navigating different snapshots)...
next step... Making an editable document version of ALL the data ... how manageable would/could such a document be, could/should it be broken into separate lists based on type / project ?
%% Cell type:code id:50c38209-4fe7-4c5b-b0b6-a023051ff163 tags:
``` python
def check_wayback(items):
wbc = wayback.WaybackClient()
for item in items['items']:
if 'download_status' in item and item['download_status'] != 200:
print(f"Checking wayback for {item['download']}")
for result in wbc.search(item['download']):
item['wayback'] = result.raw_url
item['wayback_view'] = result.view_url
break
```
%% Cell type:markdown id:9a231599-ab6c-4dd5-9ba1-528f87c30dca tags:
## YAML?!
What can these document / list / spreadsheet forms look like in a collectively editable version? (etherpad, ethercalc, hackpad, ...) **or** is JSON the format already (with json-ld we **HAVE** the link to RDF), what does a YAML representation of the JSON look like, are there shared JSON editors ?! (or do we enter Google Wave territory here)
%% Cell type:code id:41c7044c-ea75-4204-9f97-59ef2e6f5918 tags:
``` python
import yaml
```
%% Cell type:code id:2fc93f6f-137e-4d91-ab44-741992ffb819 tags:
``` python
# print (yaml.dump(items['items']))
```
%% Cell type:markdown id:c837c0e4-c142-4403-8729-0b1a8256e602 tags:
This is quite readable (apart from the unicode stuff --- which I'm assuming there must be a way around)... But the quantity is large, and the division between items not nice (separate streams for each would be nicer)... But it's better than a markdown table or definition list. Being able to specify an order to the fields would also be very useful for sure (or to start, alphabetizing them).
%% Cell type:code id:288bb024-c1f5-421c-b91c-9368b7825ff7 tags:
``` python
# print (yaml.dump_all(items['items'], allow_unicode=True))
```
%% Cell type:code id:25d49c40-eb93-4fe6-98a5-4dc250b22469 tags:
``` python
def save_to_yaml(path, data):
with open(path, "w") as fout:
print (yaml.dump_all(data, allow_unicode=True), file=fout)
```
%% Cell type:code id:5681a408-9bee-47fc-a451-f66417f8448a tags:
``` python
##titems = scrape_items("https://www.constantvzw.org/verlag/spip.php?page=list&mot_filtre=8&lang_id=0&go=select")
## titems = scrape_items("https://www.constantvzw.org/verlag/spip.php?page=list&mot_filtre=9&lang_id=0&go=select")
#titems = scrape_items("https://www.constantvzw.org/verlag/spip.php?page=list&mot_filtre=4&lang_id=0")
#print (f"{len(titems['items'])} items")
```
%% Cell type:code id:542f5d61-a9d7-42fe-a971-1d2b91d95651 tags:
``` python
# don't do ssl
ssl._create_default_https_context = ssl._create_unverified_context
```
%% Cell type:code id:7f1331f4-4f47-4682-927d-615760d6b632 tags:
``` python
# by_language
items = scrape_items("https://www.constantvzw.org/verlag/spip.php?page=list&mot_filtre=4&lang_id=0&go=select")
print (f"{len(items['items'])} items")
scrape_each_item(items)
save_to_json("verlag.json", items)
```
%%%% Output: stream
140 items
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '1999'), ('date_added', '2007-04-27'), ('license', 'Copyright'), ('projects', ['Cyberfeminist working days', 'Digitales']), ('themes', ['(Cyber)feminism', 'Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=46&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2005'), ('date_added', '2007-10-01'), ('license', 'Creative Commons Attribution-NoDerivs'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=31&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'Creative Commons Attribution-NoDerivs'), ('projects', ['Stitch and Split']), ('published_in', ['Suturas & Fragmentos']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=109&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('languages', 'English, Français, Nederlands'), ('year', '2009'), ('date_added', '2010-02-17'), ('licenses', ['Creative Commons Attribution-NoDerivs', 'creativecommons.org/licenses/by-nc/3.0/', 'creativecommons.org/licenses/by-nc-sa/3.0/', '']), ('projects', ['Verbindingen / Jonctions 10']), ('themes', ['Body and technology', 'Free Software', 'free culture', 'Performativity', 'Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=105&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('languages', 'English, Français, Nederlands'), ('year', '2004'), ('date_added', '2009-07-13'), ('licenses', ['Creative Commons Attribution-NoDerivs', 'creativecommons.org/licenses/by-nc/3.0/', 'creativecommons.org/licenses/by-nc-sa/3.0/', 'Creative Commons Public Domain', '']), ('projects', ['Verbindingen / Jonctions 7']), ('themes', ['Performativity', 'Work'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=94&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-11-02'), ('license', 'creativecommons.org/licenses/by-nc/3.0/'), ('projects', ['Verbindingen / Jonctions 7']), ('published_in', ['Verbindingen / Jonctions 7']), ('themes', ['Performativity'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=93&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-11-02'), ('license', 'creativecommons.org/licenses/by-nc/3.0/'), ('projects', ['Verbindingen / Jonctions 7']), ('published_in', ['Verbindingen / Jonctions 7']), ('themes', ['Language', 'Performativity'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=44&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2002'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=41&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2004'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=33&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Español'), ('year', '2006'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=47&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=38&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=39&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=108&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Nederlands'), ('year', '2008'), ('date_added', '2010-02-17'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=25&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['Suturas & Fragmentos']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=32&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['Suturas & Fragmentos']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=35&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=49&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=50&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2006'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=29&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['Suturas & Fragmentos']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=30&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['Suturas & Fragmentos']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=137&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('languages', 'English, Español'), ('year', '2013'), ('date_added', '2013-05-16'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['The Libre Graphics Research Unit is a traveling lab where new ideas for creative tools are developed. The Research Unit is an initiative of four European media-labs actively engaged in Free/Libre and Open Source Software and Free Culture. This cross-disciplinary project involves artists', 'designers and programmers and is developed in dialogue with the Libre Graphics community.']), ('themes', ['Body and technology', 'Digital tools', 'Free Software', 'free culture', 'Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=66&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2002'), ('date_added', '2007-10-02'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Verbindingen / Jonctions 6']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=24&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=112&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2008'), ('date_added', '2010-10-10'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=151&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2017'), ('date_added', '2017-02-07'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('themes', ['Body and technology', 'Digital tools', 'Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=37&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=43&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=28&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=111&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2010'), ('date_added', '2010-10-10'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=42&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=48&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=27&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=45&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2000'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=34&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=26&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Body and technology', 'Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=36&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Español'), ('date_added', '2007-10-01'), ('license', 'creativecommons.org/licenses/by-nc-sa/3.0/'), ('projects', ['Stitch and Split']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=118&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Français'), ('year', '2009'), ('date_added', '2011-10-31'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=119&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-10-31'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=120&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=121&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=122&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=123&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=124&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Nederlands'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=125&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-01'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=127&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=126&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2011'), ('date_added', '2011-11-06'), ('projects', ['!Co LAPse KoDe']), ('themes', ['Body and technology', 'Notation', 'Performativity', 'Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=128&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Français'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Theater'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=129&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Theater'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=130&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=131&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=132&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2011-11-08'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Theater'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=139&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('year', '2013'), ('date_added', '2013-05-16'), ('projects', ['The Libre Graphics Research Unit is a traveling lab where new ideas for creative tools are developed. The Research Unit is an initiative of four European media-labs actively engaged in Free/Libre and Open Source Software and Free Culture. This cross-disciplinary project involves artists', 'designers and programmers and is developed in dialogue with the Libre Graphics community.']), ('themes', ['Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=134&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('languages', 'English, Français, Nederlands'), ('year', '2011'), ('date_added', '2011-11-21'), ('projects', ['Verbindingen / Jonctions 12']), ('themes', ['Fiction', 'Free Software', 'free culture', 'Performativity', 'Report', 'Theater'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=69&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2003'), ('date_added', '2007-10-02'), ('projects', ['Transmedia Workshop']), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=114&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2011'), ('date_added', '2011-06-20'), ('projects', ['adashboard (for fiction)']), ('themes', ['Brussels', 'Fiction', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=104&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Français'), ('year', '2005'), ('date_added', '2009-07-10'), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=145&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2015'), ('date_added', '2015-09-24'), ('themes', ['Data practice', 'Digital tools', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=140&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2013'), ('date_added', '2013-05-16'), ('projects', ['Active Archives']), ('themes', ['Data practice', 'Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=101&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2008'), ('date_added', '2009-07-10'), ('published_in', ['Designing Universal Knowledge']), ('themes', ['Design and typography', 'Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=135&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2012'), ('date_added', '2012-01-24'), ('projects', ['Ellentriek']), ('themes', ['Digital tools', 'Performativity'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=18&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=136&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Nederlands'), ('year', '2012'), ('date_added', '2014-01-19'), ('themes', ['Digital tools', 'Fiction', 'Report', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=97&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('year', '2009'), ('date_added', '2009-07-10'), ('published_in', ['FLOSS+Art']), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=158&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2016'), ('date_added', '2017-03-29'), ('themes', ['Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=159&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2016'), ('date_added', '2017-03-29'), ('themes', ['Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=152&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2017'), ('date_added', '2017-02-07'), ('themes', ['Body and technology', 'Infrastructure'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=146&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2015'), ('date_added', '2015-09-24'), ('themes', ['(Cyber)feminism', 'Body and technology', 'Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=110&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2010'), ('date_added', '2010-09-07'), ('projects', ['LABtoLAB']), ('themes', ['Digital tools', 'Free Software', 'free culture', 'Work'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=17&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=156&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2017'), ('date_added', '2017-03-14'), ('themes', ['Data practice'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=67&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-02'), ('projects', ['2005']), ('published_in', ['The Language of Sharing']), ('themes', ['Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=51&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Design and typography', 'Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=150&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('languages', 'English, Français, Nederlands'), ('year', '2016'), ('date_added', '2016-03-15'), ('projects', ['Parlez-vous Saint-Gillois']), ('themes', ['Brussels', 'Language', 'Notation', 'Participatory art'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=23&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2006'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Body and technology', 'Design and typography', 'Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=40&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-01'), ('projects', ['Stitch and Split']), ('published_in', ['AS 178 Selves and Territories in Science Fiction']), ('themes', ['Science (-) Fiction'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=100&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('languages', 'English, Français, Nederlands'), ('year', '2005'), ('date_added', '2009-07-10'), ('projects', ['2005']), ('themes', ['Free Software', 'free culture', 'Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=144&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2013'), ('date_added', '2014-01-19'), ('projects', ['Verbindingen-Jonctions 14']), ('themes', ['Data practice', 'Digital tools', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=133&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Français'), ('year', '2011'), ('date_added', '2011-11-10'), ('themes', ['Brussels', 'Digital tools', 'Media labs'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=141&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Français'), ('year', '2013'), ('date_added', '2013-04-16'), ('license', 'GPLv2'), ('projects', ['Manuals that have been co/written or edited by Constant members and have been produced / published by FLOSS Manuals publishers', 'Open Source Video']), ('themes', ['Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=143&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Français'), ('year', '2012'), ('date_added', '2013-05-16'), ('license', 'GPLv2'), ('projects', ['Manuals that have been co/written or edited by Constant members and have been produced / published by FLOSS Manuals publishers']), ('themes', ['Design and typography', 'Digital tools', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=116&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2010'), ('date_added', '2011-07-04'), ('license', 'Not specified'), ('projects', ['Towards']), ('published_in', ['Mapping in urban design Part 3/Paper 2']), ('themes', ['Cartography', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=115&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2010'), ('date_added', '2011-07-04'), ('license', 'Not specified'), ('projects', ['Towards']), ('published_in', ['Mapping in urban design Part 3/Paper 2']), ('themes', ['Cartography', 'Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=9&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2000'), ('date_added', '2007-04-25'), ('license', 'Not specified'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=89&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2003'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=72&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2006'), ('date_added', '2007-10-02'), ('projects', ['']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=75&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '1999'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=64&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-02'), ('projects', ['Verbindingen / Jonctions 6'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=81&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-05'), ('projects', ['']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=82&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2006'), ('date_added', '2007-10-05'), ('projects', ['']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=87&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=52&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=68&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-02'), ('projects', ['Transmedia Workshop']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=138&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('languages', 'English, Español, Français'), ('year', '2013'), ('date_added', '2013-05-16'), ('projects', ['The Libre Graphics Research Unit is a traveling lab where new ideas for creative tools are developed. The Research Unit is an initiative of four European media-labs actively engaged in Free/Libre and Open Source Software and Free Culture. This cross-disciplinary project involves artists', 'designers and programmers and is developed in dialogue with the Libre Graphics community.']), ('themes', ['Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=21&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2000'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=99&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'Nederlands'), ('year', '2008'), ('date_added', '2009-07-10'), ('projects', ['']), ('themes', ['Design and typography', 'Digital tools', 'Free Software', 'free culture', 'Work'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=103&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2009-07-10'), ('themes', ['Design and typography'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=96&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2009'), ('date_added', '2009-07-10'), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=95&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Publication'), ('language', 'English'), ('year', '2008'), ('date_added', '2009-07-10'), ('projects', ['Open Source Video']), ('themes', ['Digital tools', 'Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=19&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=14&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-04-27'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=58&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2000'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=53&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '1997'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=86&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism', 'Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=57&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2001'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=71&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2004'), ('date_added', '2007-10-02'), ('projects', ['Transmedia Workshop']), ('themes', ['Free Software', 'free culture'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=5&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-04-25'), ('projects', ['The Flesh Machine']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=98&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Nederlands'), ('date_added', '2009-07-10'), ('published_in', ['CROSS-over']), ('themes', ['Digital tools', 'Free Software', 'free culture', 'Work'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=88&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=77&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=76&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Report'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=78&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Body and technology', 'Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=70&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2004'), ('date_added', '2007-10-02'), ('projects', ['Transmedia Workshop']), ('themes', ['Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=10&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-04-27'), ('projects', ['Digitales']), ('themes', ['Work'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=55&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=11&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-04-27'), ('projects', ['Cyberfeminist working days', 'Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=85&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=62&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'Français'), ('date_added', '2007-10-02'), ('projects', ['']), ('themes', ['Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=7&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '1998'), ('date_added', '2007-04-25'), ('projects', ['Museums and New Media']), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=3&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '1996'), ('date_added', '2007-04-25'), ('projects', ['Verbindingen / Jonctions 1']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=4&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '1996'), ('date_added', '2007-04-25'), ('projects', ['Verbindingen / Jonctions 1']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=54&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=59&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2000'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=83&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('date_added', '2007-10-05'), ('projects', ['']), ('themes', ['Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=74&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '1999'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Body and technology', 'Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=73&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '1999'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Body and technology', 'Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=84&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=90&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2006'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=22&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Deutch'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=79&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2000'), ('date_added', '2007-10-02'), ('projects', ['Modestwitness']), ('themes', ['Body and technology', 'Language'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=60&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2001'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=6&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '1996'), ('date_added', '2007-04-25'), ('projects', ['The Flesh Machine']), ('themes', ['Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=20&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2002'), ('date_added', '2007-10-01'), ('projects', ['']), ('themes', ['Digital tools', 'Free Software', 'free culture', 'Rethinking the author'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=63&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Resource'), ('language', 'English'), ('date_added', '2007-10-02'), ('projects', ['Verbindingen / Jonctions 3']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=56&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-01'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=61&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2001'), ('date_added', '2007-10-02'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=8&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Français'), ('year', '2002'), ('date_added', '2007-04-25'), ('projects', ['Cyberfeminist working days']), ('themes', ['(Cyber)feminism'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=16&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2007'), ('date_added', '2007-04-27'), ('projects', ['Cyberfeminist working days', 'Digitales']), ('themes', ['(Cyber)feminism', 'Digital tools'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=65&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'English'), ('year', '2002'), ('date_added', '2007-10-02'), ('projects', ['Verbindingen / Jonctions 6']), ('themes', ['(Cyber)feminism', 'Body and technology'])]
scrape_item https://www.constantvzw.org/verlag/spip.php?page=article&id_article=91&mot_filtre=4&id_lang=0&debut_source_material=0
info [('status', 'Selected text'), ('language', 'Nederlands'), ('year', '2000'), ('date_added', '2007-10-22'), ('projects', ['Digitales']), ('themes', ['(Cyber)feminism', 'Body and technology'])]
%% Cell type:code id:a28ac329-aa37-4dbd-a884-fccd25ac4f58 tags:
``` python
cbs = test_url_statuses(items)
```
%%%% Output: stream
Checking status http://constant.all2all.org/~digitales/texts/fernandez_confNL.02
Checking status https://www.constantvzw.org/verlag/IMG/doc/Salman_Sayyid.doc
Checking status http://data.constantvzw.org/s-a-s/17_Sayyid.pdf
Checking status http://ospublish.constantvzw.org/sources/vj10/vj10-interior.pdf
Checking status https://www.constantvzw.org/verlag/IMG/rtf/Sehgal-2.rtf
Checking status https://www.constantvzw.org/verlag/IMG/rtf/Laermans.rtf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Muriel_Andrin.doc
Checking status https://www.constantvzw.org/verlag/IMG/doc/Catherine_S_Ramirez.doc
Checking status http://data.constantvzw.org/s-a-s/15_villota.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Sarah_Bracke.doc
Checking status http://data.constantvzw.org/s-a-s/16_zummer.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/An_Mertens.doc
Checking status http://routes-routines.constantvzw.org/wp-content/uploads/2010/ctt_routes-routines-interieur-small.pdf
Checking status http://data.constantvzw.org/s-a-s/04_hopkinson.pdf
Checking status http://data.constantvzw.org/s-a-s/14_stengers.pdf
Checking status http://data.constantvzw.org/s-a-s/03_gallego.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Richard_Barbrook.doc
Checking status https://www.constantvzw.org/verlag/IMG/doc/Gul_Kacmaz_Erk.doc
Checking status http://data.constantvzw.org/s-a-s/11_Perez.pdf
Checking status http://data.constantvzw.org/s-a-s/12_ramirez.pdf
Checking status https://gitorious.org/libregraphicsmag/interactivosbook/
Checking status http://www.constantvzw.com/vj6/Texts.php?id=42
Checking status http://data.constantvzw.org/s-a-s/01_aguilera.pdf
Checking status http://www.colaboratorio.es/en/games/cat/9/p/12/legal-performance/
Checking status http://constantvzw.org/verlag/IMG/pdf/db06_executing_practices_modifiying_the_universal_pierrot_roscam_abbing_snelting.pdf
Checking status http://data.constantvzw.org/s-a-s/10_navarro.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Ronald_Soetaert_Kris_Rutten_Ive_Verdoodt.doc
Checking status http://data.constantvzw.org/s-a-s/08_moisseeff.pdf
Checking status https://www.constantvzw.org/verlag/IMG/pdf/R_R_EN-med.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/David_Sanchez_Rubio.doc
Checking status https://www.constantvzw.org/verlag/IMG/doc/StarwardWord_liz_williams.doc
Checking status http://data.constantvzw.org/s-a-s/06_mcleod_the_ship.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Cory_Doctorow.doc
Checking status http://data.constantvzw.org/s-a-s/02_barcelo.pdf
Checking status http://data.constantvzw.org/s-a-s/07_mcleod_untitled.pdf
Checking status http://data.constantvzw.org/s-a-s/09_moreno.pdf
Checking status https://www.constantvzw.org/verlag/IMG/ods/FLOSS_Fit_for_purpose_a_testers_day_in_3_acts.ods
Checking status https://www.constantvzw.org/verlag/IMG/pdf/GooDiff-FreeSoftwareAgainstLegalGrayGoo.pdf
Checking status https://www.constantvzw.org/verlag/IMG/pdf/marginalia_article__def.pdf
Checking status http://www.adashboard.org/kaleidoscope_art/kaleidoscope.html
Checking status http://aa.lgru.net/pages/Catalog/
Checking status http://www.constantvzw.com/transmedia_archive/000094.html
Checking status https://www.constantvzw.org/verlag/IMG/pdf/cqrrelations_rapport-fr-27-05-2015.pdf
Checking status http://www.activearchives.org/whoswho/datadiary.pdf
Checking status http://snelting.domainepublic.net/texts/divide_share.pdf
Checking status http://www.pianofabriek.be
Checking status http://www.constantvzw.com/copy.cult/texts/lal_en.html
Checking status http://www.adashboard.org/wp-content/uploads/2012/12/funzie_fonzie_leesmasjien_def.pdf
Checking status http://snelting.domainepublic.net/texts/generous_practices.odt
Checking status https://www.constantvzw.org/verlag/IMG/pdf/i_dont_know_en.pdf
Checking status https://www.constantvzw.org/verlag/IMG/pdf/i_dont_know_fr.pdf
Checking status http://etherdump.constantvzw.org/p/machineresearch.interviewwithetherbox.raw.html
Checking status https://www.constantvzw.org/verlag/IMG/pdf/tto_report.pdf
Checking status http://www.constantvzw.com/copy.cult/texts/lal.html
Checking status https://machineresearch.wordpress.com/
Checking status http://www.constantvzw.com/downloads/marc.pdf
Checking status http://ospublish.constantvzw.org/wp-content/uploads/berlin_pp.pdf
Checking status http://parlezvous1060.be/media/publication.pdf
Checking status http://ospublish.constantvzw.org/wp-content/uploads/corsetskinskeleton.pdf
Checking status https://www.constantvzw.org/verlag/IMG/doc/Inleiding.doc
Checking status http://www.adashboard.org/wp-content/uploads/2014/01/une_pratique_d_ecriture_au_21e_siecle.pdf
Checking status https://www.constantvzw.org/verlag/IMG/pdf/WonderTechno-Victoire-LeSoir-051111.pdf
Checking status http://fr.flossmanuals.net/
Checking status http://fr.flossmanuals.net/scribus/
Checking status http://www.towards.be/site/spip.php?article367
Checking status http://www.towards.be/site/spip.php?article366
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art26
Checking status http://digitales.constantvzw.org/texts/
Checking status http://ospublish.constantvzw.org/?p=85#more-85
Checking status http://www.constantvzw.com/modestwitness1/aae.html
Checking status http://www.constantvzw.com/vj6/Texts.php?id=41
Checking status http://www.stormy-weather.be/wiki/index.php/CCenContexte
Checking status http://www.stormy-weather.be/wiki/index.php/CCinContext
Checking status http://digitales.constantvzw.org/texts/
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art5
Checking status http://www.constantvzw.com/transmedia_archive/000101.html
Checking status http://reader.lgru.net/
Checking status http://www.constantvzw.com/copy.cult/texts/manifesto.html
Checking status https://www.constantvzw.org/verlag/Download Bittorent
Checking status http://networkcultures.org/wp-content/uploads/2008/10/vv_reader_small.pdf
Checking status http://www.constantvzw.com/copy.cult/copyrights.pdf
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art11
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art1
Checking status http://digitales.constantvzw.org/texts/
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art6
Checking status http://www.constantvzw.com/transmedia_archive/000042.html
Checking status http://www.constantvzw.com/e06/nl/cae02nl.html
Checking status http://snelting.domainepublic.net/texts/genereuze_praktijken_corr.doc
Checking status http://digitales.constantvzw.org/texts/
Checking status http://www.constantvzw.com/modestwitness1/itivityfr.html
Checking status http://www.constantvzw.com/modestwitness1/itivitynl.html
Checking status http://www.constantvzw.com/modestwitness1/iview.html
Checking status http://www.constantvzw.com/transmedia_archive/000047.html
Checking status http://constant.all2all.org/~digitales/texts/vogelart.doc
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art8
Checking status http://constant.all2all.org/~digitales/texts/botbol-baum06.doc
Checking status http://digitales.constantvzw.org/texts/
Checking status http://ospublish.constantvzw.org/wp-content/uploads/retrospective_readingc.pdf
Checking status http://www.constantvzw.com/e11/en/j303.html
Checking status http://www.constantvzw.com/e01/nl/music.pdf
Checking status http://www.constantvzw.com/e01/fr/music.pdf
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art3
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art4
Checking status http://www.stormy-weather.be/wiki/index.php/Refugees_in_a_contractual_utopia
Checking status http://www.constantvzw.com/modestwitness1/sqfr.html
Checking status http://www.constantvzw.com/modestwitness1/sqnl.html
Checking status http://digitales.constantvzw.org/texts/
Checking status http://digitales.constantvzw.org/texts/Vishmidt0606.pdf
Checking status http://www.constantvzw.com/copy.cult/texts/stolen.html
Checking status http://www.constantvzw.com/modestwitness1/soen.html
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art7
Checking status http://www.constantvzw.com/e06/pdf/flesh.pdf
Checking status http://www.constantvzw.com/copy.cult/texts/reg_liberty1.html
Checking status http://www.constantvzw.com/e12/fr/j04.html
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art17
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art18
Checking status http://www.constantvzw.com/cyberf/book/articles.php?pg=art21
Checking status http://constant.all2all.org/~digitales/texts/aristarkhova_confENG_02.rtf
Checking status http://www.constantvzw.com/vj6/Texts.php?id=40
Checking status http://digitales.constantvzw.org/texts/
%% Cell type:code id:533e53e6-ff69-4415-8500-2865fc31e180 tags:
``` python
check_wayback(items)
```
%%%% Output: stream
Checking wayback for http://constant.all2all.org/~digitales/texts/fernandez_confNL.02
Checking wayback for http://data.constantvzw.org/s-a-s/17_Sayyid.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/15_villota.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/16_zummer.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/04_hopkinson.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/14_stengers.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/03_gallego.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/11_Perez.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/12_ramirez.pdf
Checking wayback for http://www.constantvzw.com/vj6/Texts.php?id=42
Checking wayback for http://data.constantvzw.org/s-a-s/01_aguilera.pdf
Checking wayback for http://www.colaboratorio.es/en/games/cat/9/p/12/legal-performance/
Checking wayback for http://data.constantvzw.org/s-a-s/10_navarro.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/08_moisseeff.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/06_mcleod_the_ship.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/02_barcelo.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/07_mcleod_untitled.pdf
Checking wayback for http://data.constantvzw.org/s-a-s/09_moreno.pdf
Checking wayback for http://www.adashboard.org/kaleidoscope_art/kaleidoscope.html
Checking wayback for http://www.constantvzw.com/transmedia_archive/000094.html
Checking wayback for http://www.constantvzw.com/copy.cult/texts/lal_en.html
Checking wayback for http://www.constantvzw.com/copy.cult/texts/lal.html
Checking wayback for http://www.constantvzw.com/downloads/marc.pdf
Checking wayback for http://ospublish.constantvzw.org/wp-content/uploads/berlin_pp.pdf
Checking wayback for http://parlezvous1060.be/media/publication.pdf
Checking wayback for http://ospublish.constantvzw.org/wp-content/uploads/corsetskinskeleton.pdf
Checking wayback for http://www.towards.be/site/spip.php?article367
Checking wayback for http://www.towards.be/site/spip.php?article366
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art26
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://www.constantvzw.com/modestwitness1/aae.html
Checking wayback for http://www.constantvzw.com/vj6/Texts.php?id=41
Checking wayback for http://www.stormy-weather.be/wiki/index.php/CCenContexte
Checking wayback for http://www.stormy-weather.be/wiki/index.php/CCinContext
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art5
Checking wayback for http://www.constantvzw.com/transmedia_archive/000101.html
Checking wayback for http://www.constantvzw.com/copy.cult/texts/manifesto.html
Checking wayback for https://www.constantvzw.org/verlag/Download Bittorent
Checking wayback for http://www.constantvzw.com/copy.cult/copyrights.pdf
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art11
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art1
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art6
Checking wayback for http://www.constantvzw.com/transmedia_archive/000042.html
Checking wayback for http://www.constantvzw.com/e06/nl/cae02nl.html
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://www.constantvzw.com/modestwitness1/itivityfr.html
Checking wayback for http://www.constantvzw.com/modestwitness1/itivitynl.html
Checking wayback for http://www.constantvzw.com/modestwitness1/iview.html
Checking wayback for http://www.constantvzw.com/transmedia_archive/000047.html
Checking wayback for http://constant.all2all.org/~digitales/texts/vogelart.doc
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art8
Checking wayback for http://constant.all2all.org/~digitales/texts/botbol-baum06.doc
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://ospublish.constantvzw.org/wp-content/uploads/retrospective_readingc.pdf
Checking wayback for http://www.constantvzw.com/e11/en/j303.html
Checking wayback for http://www.constantvzw.com/e01/nl/music.pdf
Checking wayback for http://www.constantvzw.com/e01/fr/music.pdf
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art3
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art4
Checking wayback for http://www.stormy-weather.be/wiki/index.php/Refugees_in_a_contractual_utopia
Checking wayback for http://www.constantvzw.com/modestwitness1/sqfr.html
Checking wayback for http://www.constantvzw.com/modestwitness1/sqnl.html
Checking wayback for http://digitales.constantvzw.org/texts/
Checking wayback for http://www.constantvzw.com/copy.cult/texts/stolen.html
Checking wayback for http://www.constantvzw.com/modestwitness1/soen.html
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art7
Checking wayback for http://www.constantvzw.com/e06/pdf/flesh.pdf
Checking wayback for http://www.constantvzw.com/copy.cult/texts/reg_liberty1.html
Checking wayback for http://www.constantvzw.com/e12/fr/j04.html
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art17
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art18
Checking wayback for http://www.constantvzw.com/cyberf/book/articles.php?pg=art21
Checking wayback for http://constant.all2all.org/~digitales/texts/aristarkhova_confENG_02.rtf
Checking wayback for http://www.constantvzw.com/vj6/Texts.php?id=40
Checking wayback for http://digitales.constantvzw.org/texts/
%% Cell type:code id:184e510c-25f2-44a4-abc1-f2ea6bc7c295 tags:
``` python
#save_to_yaml("verlag.yaml", items) # this actually doesn't produce anything except the key items, with yaml.dump_all it seems you need a list
save_to_yaml("verlag.yaml", items['items'])
save_to_json("verlag.json", items)
```
%% Cell type:markdown id:7fa6c776-854f-4ded-9d22-74c1fb9d5108 tags:
## Check / extract text from verlag itself (is it in the article ?)
## Group texts by language (currently separate items)
%% Cell type:markdown id:a1ab33e3-4d3d-422c-bf7d-018717f51799 tags:
## Adapt for solr
%% Cell type:code id:77348650-eda2-4d58-b0ca-193ba907d15f tags:
``` python
import json
def fix_author (x):
x = x.strip()
if "," in x:
last, first = x.split(",", 1)
last = last.strip()
first = first.strip()
return f"{first} {last}"
else:
return x
with open("verlag.json") as fin:
verlag = json.load(fin)
for item in verlag['items']:
item['id'] = item['url']
del item['url']
if 'authors' in item:
item['authors'] = [fix_author(x) for x in item['authors']]
with open("verlag.solr.json", "w") as fout:
print (json.dumps(verlag['items'], indent=2), file=fout)
```
%% Cell type:code id:25714f29-50d0-4795-85e0-34b76a7cf11b tags:
``` python
!cp verlag.solr.json /home/murtaugh/src/solr-8.11.0/LOCAL
```
%% Cell type:code id:e9b9effe-bfc6-478a-aca5-a65e99aeb53e tags:
``` python
!cd /home/murtaugh/src/solr-8.11.0/ && bin/post -c verlag LOCAL/verlag.solr.json
```
%%%% Output: stream
java -classpath /home/murtaugh/src/solr-8.11.0/dist/solr-core-8.11.0.jar -Dauto=yes -Dc=verlag -Ddata=files org.apache.solr.util.SimplePostTool LOCAL/verlag.solr.json
SimplePostTool version 5.0.0
Posting files to [base] url http://localhost:8983/solr/verlag/update...
Entering auto mode. File endings considered are xml,json,jsonl,csv,pdf,doc,docx,ppt,pptx,xls,xlsx,odt,odp,ods,ott,otp,ots,rtf,htm,html,txt,log
POSTing file verlag.solr.json (application/json) to [base]/json/docs
1 files indexed.
COMMITting Solr index changes to http://localhost:8983/solr/verlag/update...
Time spent: 0:00:00.468
%% Cell type:markdown id:bae1f117-e8c8-4d49-b423-7fe8554f5d19 tags:
### SOLR queries
%% Cell type:code id:91bdca2c-1e10-4f97-a5f1-e6fcb550e5f5 tags:
``` python
api = "http://localhost:8983/solr/films/select"
```
%% Cell type:code id:95d4455d-c9cc-4caf-9960-948d14cbacc5 tags:
``` python
from urllib.request import urlopen
from urllib.parse import urlencode
import json
```
%% Cell type:code id:8feda316-a121-4166-a16e-298d55b4fe80 tags:
``` python
p = {}
p['q'] = "*:*"
p['rows'] = 0
p['facet'] = 'true'
p['facet.pivot'] = 'authors_str,year_str'
# p['facet.mincount'] = 2 # doesn't seem to affect this query
```
%% Cell type:code id:e8ae6f44-d1c6-4e3e-b9d7-93677208d686 tags:
``` python
url = api + "?" + urlencode(p)
d = json.load(urlopen(url))
print (json.dumps(d, indent=2))
```
%%%% Output: stream
{
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 49,
"params": {
"q": "*:*",
"facet.pivot": "authors_str,year_str",
"rows": "0",
"facet": "true"
}
},
"response": {
"numFound": 1100,
"start": 0,
"maxScore": 1.0,
"numFoundExact": true,
"docs": []
},
"facet_counts": {
"facet_queries": {},
"facet_fields": {},
"facet_ranges": {},
"facet_intervals": {},
"facet_heatmaps": {},
"facet_pivot": {
"authors_str,year_str": []
}
}
}
%% Cell type:markdown id:87365f31-e58e-413c-adbf-26c0b50e7423 tags:
## Read HTML / Adapt for Calibre
Example of a book that is only described (not contained) in the verlag page:
* https://www.constantvzw.org/verlag/spip.php?page=article&id_article=151&mot_filtre=5&id_lang=0
It has 2 paragraphs (3 including the blank final paragraph that seems to be in most of the articles).
But many articles are complete. Convert all to epub for calibre!
%% Cell type:code id:2d895af1-cb32-494b-8a20-d2e259498432 tags:
``` python
import yaml
from urllib.request import urlopen
import html5lib
import subprocess
from xml.etree import ElementTree as ET
```
%% Cell type:code id:91a4e959-0984-4c41-96ee-07adf249036f tags:
``` python
def innerHTML (elt):
if elt.text != None:
ret = elt.text
else:
ret = u""
return ret + u"".join([ET.tostring(x, method="html", encoding="unicode") for x in elt])
```
%% Cell type:code id:5fceefb4-f5c2-4496-84f8-7c98f570157e tags:
``` python
from hashlib import md5
import os
```
%% Cell type:code id:58bb1c0c-0813-4398-8076-2f986095c8ee tags:
``` python
def pandoc_html_to_markdown (text):
r = subprocess.run("pandoc --from html --to markdown", shell=True, input=text.encode("utf-8"), capture_output=True)
return r.stdout.decode("utf-8")
```
%% Cell type:code id:24b4f077-e307-43eb-a1a1-f7d2aa3aae9a tags:
``` python
LANGS = {}
LANGS['Español'] = "Spanish"
LANGS['Nederlands'] = "Dutch"
LANGS['Français'] = "French"
LANGS['English'] = "English"
```
%% Cell type:code id:becfcd41-fed8-45e8-b3f6-0142265c03cb tags:
``` python
# PROJS = {}
# PROJS['Cyberfeminist working days'] =
```
%% Cell type:code id:23a595f4-609b-421e-9fac-2c2897ffbdff tags:
``` python
# with open("verlag.yaml") as fin:
# for obj in yaml.full_load_all(fin):
```
%% Cell type:code id:8e680846-ae05-4974-a5a9-7959e62ce0a2 tags:
``` python
def render_epub_and_pdf (items):
for obj in items:
# print (obj)
print (obj.get("title"))
url = obj['url']
f = urlopen(url)
t = html5lib.parse(f.read(), namespaceHTMLElements=False)
src = ""
docs_path = "docs"
os.makedirs(docs_path, exist_ok=True)
authors = t.find(".//h4")
# if authors is not None:
# src += ET.tostring(authors, method="html", encoding="unicode")
title = t.find(".//h2")
titlestr = ""
if title is not None:
# src += ET.tostring(title, method="html", encoding="unicode")
titlestr = innerHTML(title)
text = t.find(".//div[@class='texte']")
if text is not None:
src += ET.tostring(text, method="html", encoding="unicode")
notes = t.find(".//div[@class='notes']")
if notes is not None:
src += ET.tostring(notes, method="html", encoding="unicode")
urlhash = md5(obj['url'].encode("utf-8")).hexdigest()
obj['md5'] = urlhash
mdpath = os.path.join(docs_path, f"{urlhash}.md")
with open(mdpath, "w") as fout:
mdyaml = {}