Commit 8749c227 authored by Michael Murtaugh's avatar Michael Murtaugh
Browse files

big cleanup

parent a0542085
#!/usr/bin/env python
# coding: utf-8
import json, os, datetime
from urllib.parse import quote as urlquote
import subprocess, re, json
......@@ -12,48 +10,10 @@ from indexalist.timecode import timecode_fromsecs
from indexalist.ffmpeginfo import get_info as ffmpeg_get_info
from indexalist.pdfinfo import get_info as pdf_get_info
from indexalist.imageinfo import get_info as image_get_info
# from etreeutils import innerHTML
from xml.etree import ElementTree as ET
from jinja2 import Markup
# from gitignore_parser import parse_gitignore
# import gitignore_parser
# def parse_gitignore_string (str, base_dir):
# """ code based on parse_gitignore, adapted for a string """
# rules = []
# counter = 0
# for line in str.splitlines():
# counter += 1
# line = line.rstrip('\n')
# rule = gitignore_parser.rule_from_pattern(line, base_path=gitignore_parser.Path(base_dir).resolve(),
# source=("__str__", counter))
# if rule:
# rules.append(rule)
# print (f"rule {rule}")
# if not any(r.negation for r in rules):
# return lambda file_path: any(r.match(file_path) for r in rules)
# else:
# # We have negation rules. We can't use a simple "any" to evaluate them.
# # Later rules override earlier rules.
# return lambda file_path: gitignore_parser.handle_negation(file_path, rules)
# def escape(arg):
# """escape shell special characters"""
# print(f"ESCAPE {arg}")
# slash = '\\'
# special = '"$*'
# arg = arg.replace(slash, slash+slash)
# for c in special:
# arg = arg.replace(c, slash+c)
# print("ESCAPE RESULT: %s" % arg)
# return '"' + arg + '"'
def innerHTML (elt):
if elt.text != None:
ret = elt.text
......@@ -61,24 +21,6 @@ def innerHTML (elt):
ret = u""
return ret + u"".join([ET.tostring(x, method="html", encoding="unicode") for x in elt])
# def ls (path):
# # TODO: honor .indexignore's
# if path == "":
# path = "."
# ret = os.listdir(path)
# ret = [(name.lower(), name) for name in ret]
# ret.sort()
# ret = [x for _, x in ret if not (x.startswith(".") or x == "index.html")]
# return ret
# def lsdeps (path):
# def xf (p):
# if os.path.isdir(os.path.join(path, p)):
# return os.path.join(p, ".index", ".index.json")
# return p
# return list(map(xf, ls(path)))
def make_folder_meta (target, source, env):
"""
Main Builder Action, constructs .index/.index.json from:
......@@ -86,7 +28,6 @@ def make_folder_meta (target, source, env):
* file metadata, e.g. [ *.index/FILE/metadata.json*, ... ]
* contained folders via their [ *FOLDER/.index/.index.json*, ... ]
"""
# siteroot = env.Dictionary()['siteroot']
tpath = target[0].path
path = os.path.dirname(os.path.dirname(tpath))
......@@ -265,7 +206,6 @@ def make_image_poster (target, source, env):
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "640x640", "-flatten", target[0].path])
ImagePoster = Builder(action=make_image_poster)
# ImageThumb = Builder(action="""
# mkdir -p $TARGET.dir && \
# convert $SOURCE[0] -auto-orient -resize 200x200 -flatten $TARGET || convert wizard: -resize 640x640 $TARGET
......@@ -277,7 +217,6 @@ def make_image_thumb (target, source, env):
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "200x200", "-flatten", target[0].path])
ImageThumb = Builder(action=make_image_thumb)
def make_image_meta (target, source, env):
d = image_get_info(source[0].path)
d['id'] = urlquote(os.path.basename(source[0].path), errors="surrogatepass")
......@@ -312,17 +251,18 @@ def make_pdf_meta (target, source, env):
PDFMeta = Builder(action=make_pdf_meta)
def add_attribute_to_links (src, attrname, attrvalue):
t = html5lib.parseFragment(src, treebuilder="etree", namespaceHTMLElements=False)
for a in t.findall(".//a"):
a.attrib[attrname]=attrvalue
return innerHTML(t)
def template_action (target, source, env):
tpath, tname = os.path.split(env.Dictionary().get("SCONS_TEMPLATE"))
tpath, tname = os.path.split(env.Dictionary().get("INDEX_TEMPLATE", "templates/index.html"))
rootpath = os.path.abspath(env.Dictionary().get("SCONS_ROOT"))
index_scripts = env.Dictionary().get("INDEX_SCRIPTS", "").split(":")
index_stylesheets = env.Dictionary().get("INDEX_STYLESHEETS", "").split(":")
# print (f"template_action, rootpath: {rootpath}")
jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(tpath))
......@@ -365,13 +305,14 @@ def template_action (target, source, env):
allkeys.add(key)
data['allkeys'] = allkeys
data['breadcrumbs'] = bc
data['scripts'] = index_scripts
data['stylesheets'] = index_stylesheets
with open(target[0].path, "wb") as fout:
fout.write(template.render(**data).encode("utf-8", errors="surrogateescape"))
# fout.write(template.render(**data).encode("utf-8", errors="replace"))
# Template = Builder(action=template_action)
Template = Builder(action=Action(template_action, varlist=("SCONS_TEMPLATE", "SCONS_ROOT")))
Template = Builder(action=Action(template_action, varlist=("INDEX_TEMPLATE", "INDEX_SCRIPTS", "INDEX_STYLESHEETS", "SCONS_ROOT")))
### Perform the build
builders = {
......@@ -403,17 +344,6 @@ env = Environment(BUILDERS=builders)
# return os.path.join(p, ".index", ".index.json")
# return p
# def ignore_p (path, ignores):
# path = os.path.abspath(path)
# for ignore in ignores:
# if ignore(path):
# print (f"IGNORING {path}", file=sys.stderr)
# return True
# return False
# DEFAULT_GITIGNORE_RULES = """
# /.*
# """.strip()
def ignore_p (path, ignores):
filename = os.path.basename(path)
if filename.startswith("."):
......@@ -536,9 +466,11 @@ for folder, deps in depwalk("."):
depsmeta.append(file_meta)
env.FolderMeta(target=folder_meta_path, source=deps+depsmeta)
template_path = os.environ.get("SCONS_TEMPLATE") or "templates/index.html"
index_stylesheets =
env.Template(target=os.path.join(folder, "index.html"), \
source=[folder_meta_path, File(template_path)], \
SCONS_TEMPLATE=template_path, \
INDEX_TEMPLATE=os.environ.get("INDEX_TEMPLATE"), \
INDEX_SCRIPTS=os.environ.get("INDEX_SCRIPTS"), \
INDEX_STYLESHEETS=os.environ.get("INDEX_STYLESHEETS"), \
SCONS_ROOT=os.environ.get("SCONS_ROOT") or rootdir.abspath
)
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of {{filename}}</title>
</head>
<body>
<h1>Index of {{filename}}</h1>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr>
<td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td>
<td><a href="..">Parent Directory</a></td>
<td>&nbsp;</td>
<td align="right">&ndash;</td>
<td>&nbsp;</td>
</tr>
{% for d in dirs %}
<tr>
<td valign="top"><img src="/icons/folder.gif" alt="[FOLDER]"></td>
<td><a href="{{d.filename|urlencode}}/">{{d.filename}}/</a></td>
<td align="right">&nbsp;</td>
<td align="right">{{d.size|filesizeformat}}</td>
<td>{{d.files_count}} files{% if d.dirs_count > 1 %}, {{d.dirs_count-1}} subfolders{% endif %}</td>
</tr>
{% endfor %}
{% for f in files %}
<tr>
<td valign="top"><img src="/icons/doc.gif" alt="[FILE]"></td>
<td><a href="{{f.filename|urlencode}}">{{f.filename}}</a></td>
<td align="right">{{f.mtime|strftime}}</td>
<td align="right">{{f.size|filesizeformat}}</td>
<td>&nbsp;</td>
</tr>
{% endfor %}
<tr><th colspan="5"><hr></th></tr>
<tr class="summary">
<td colspan="1">&nbsp;</td>
<td colspan="2">{{files_count}} files, {{dirs_count-1}} folders</td>
<td colspan="1" align="right">{{size|filesizeformat}}</td>
<td colspan="1">&nbsp;</td>
</tr>
<tr><th colspan="5"><hr></th></tr>
</table>
<address>Last updated {{now}}</address>
</body></html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of {{relpath}}</title>
</head>
<body>
<h1>Index of {{relpath}}</h1>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th>Duration</th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="..">Parent Directory</a></td><td>&nbsp;</td><td align="right">&ndash;</td><td>&nbsp;</td></tr>
{% for g in groupedfiles %}<tr><td valign="top">{% if g.files[0].is_file %}<img src="/icons/doc.gif" alt="[FILE]">{% else %}<img src="/icons/folder.gif" alt="[DIR]">{%endif%}{% if g.poster %}<img src="{{g.poster.filename}}"/>{%endif%}</td><td>{% for f2 in g.files %}<a href="{{f2.filename}}">{{f2.filename}}{% if f2.is_dir %}/{% endif %}</a>{% endfor %}</td><td align="right">{{g.files[0].mtime_iso|strftime}} </td><td align="right">{% if g.files[0].is_file %}{{g.files[0].bytes|humanize_bytes}}{% else %}&ndash;{%endif%}</td><td>{% if g.meta and g.meta.duration %}{{g.meta.duration|timecode}}{%endif%}</td><td>&nbsp;</td></tr>
{% endfor %}
<tr><th colspan="5"><hr></th></tr>
</table>
<address>Last updated {{now|strftime}}</address>
</body></html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of {{relpath}}</title>
</head>
<body>
<h1>Index of {{relpath}}</h1>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr><td valign="top"><img src="/icons/back.gif" alt="[PARENTDIR]"></td><td><a href="..">Parent Directory</a></td><td>&nbsp;</td><td align="right">&ndash;</td><td>&nbsp;</td></tr>
{% for g in groupedfiles %}<tr><td valign="top">{% if g.files[0].is_file %}<img src="/icons/doc.gif" alt="[FILE]">{% else %}<img src="/icons/folder.gif" alt="[DIR]">{%endif%}{% if g.poster %}<img src="{{g.poster.filename}}"/>{%endif%}</td><td><a href="{{g.files[0].filename}}">{{g.files[0].filename}}{% if g.files[0].is_dir %}/{% endif %}</a></td><td align="right">{{g.files[0].mtime_iso|strftime}} </td><td align="right">{% if g.files[0].is_file %}{{g.files[0].bytes|humanize_bytes}}{% else %}&ndash;{%endif%}</td><td>&nbsp;</td></tr>
{% endfor %}
<tr><th colspan="5"><hr></th></tr>
</table>
<address>Last updated {{now|strftime}}</address>
</body></html>
......@@ -4,8 +4,12 @@
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Index of {{filename|default("/", True)}}</title>
<link rel="stylesheet" type="text/css" href="/lib/index.css">
<script src="/lib/index.js"></script>
{% for stylesheet in stylesheets -%}
<link rel="stylesheet" type="text/css" href="{{stylesheet}}">
{%- endfor %}
{% for scripts in scripts -%}
<script src="{{script}}"></script>
{%- endfor %}
</head>
<body vocab="http://activearchives.org/terms/" typeof="mediafolder">
<h1 class="breadcrumbs">{% for b in breadcrumbs %}
......
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Index of {{filename|default("/", True)}}</title>
</head>
<body>
<p class="breadcrumbs">{% for label, link in breadcrumbs %}
<a href="{{link}}">{{label}}</a>{% if not loop.last %} | {%endif%}{%endfor%}
</p>
<h1>{{filename|default("/", True)}}</h1>
<table>
<tr>
<th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th>
<th>Name</th>
<th>Last modified</th>
<th>Size</th>
<th>Description</th>
</tr>
<tr><th colspan="5"><hr></th></tr>
{% for d in dirs %}
<tr>
<td valign="top"><img src="/icons/folder.gif" alt="[FOLDER]"></td>
<td><a href="{{d.filename|urlencode}}/index.html">{{d.filename}}/</a></td>
<td align="right">{% if d.mtime %}{{d.mtime|strftime}}{%else%}&mdash;{%endif%}</td>
<td align="right" content="{{d.size}}">{{d.size|filesizeformat}}</td>
<td>{{d.files_count}} files{% if d.dirs_count > 1 %}, {{d.dirs_count-1}} subfolders{% endif %}</td>
</tr>
{% endfor %}
{% for f in files %}
<tr>
<td valign="top"><img src="/icons/doc.gif" alt="[FILE]"></td>
<td><a data-mime="{{f.mime}}" href="{{f.filename|urlencode}}">{{f.filename}}</a></td>
<td align="right">{{f.mtime|strftime}}</td>
<td align="right">{{f.size|filesizeformat}}</td>
<td>&nbsp;</td>
</tr>
{% endfor %}
<tr><th colspan="5"><hr></th></tr>
<tr class="summary">
<td colspan="1">&nbsp;</td>
<td colspan="1">{{files_count}} files{% if dirs_count > 1 %}, {{dirs_count-1}} folders{%endif%}</td>
<td colspan="1" align="right">{% if mtime%}{{mtime|strftime}}{%else %}&nbsp;{%endif%}</td>
<td colspan="1" align="right">{{size|filesizeformat}}</td>
<td colspan="1">&nbsp;</td>
</tr>
<tr><th colspan="5"><hr></th></tr>
</table>
<script src="media.js"></script>
</body>
</html>
from __future__ import print_function
import os, stat, datetime, sys, hashlib
"""
The "nodes" are fileinfo bubbles that eventually prevent unnecessary (deep) access to the underlying files.
"""
def fileinfo (path):
_, name = os.path.split(path)
ret = {"name": name}
ret['path'] = path
stat = os.lstat(path)
ret['mtime'] = datetime.datetime.fromtimestamp(stat.st_mtime).isoformat()
ret['ctime'] = datetime.datetime.fromtimestamp(stat.st_ctime).isoformat()
ret['size'] = stat.st_size
ret['mode'] = str(oct(stat.st_mode))[-6:]
if os.path.isfile(path):
ret['type'] = 'file'
else:
ret['type'] = 'directory'
return ret
def get_hash (n):
if 'sha1' in n:
return n['sha1']
else:
n['sha1'] = githash(n['path'])
return n['sha1']
def identical (x, y):
""" """
return 'mtime' in x and 'mtime' in y and x['mtime'] == y['mtime'] and \
'sha1' in x and 'sha1' in y and x['sha1'] == y['sha1']
def children (n):
if 'children' in n:
return n['children']
if os.path.isdir(n['path']):
r = n['children'] = [fileinfo(os.path.join(n['path'], x)) for x in sorted(os.listdir(n['path']))]
else:
r = []
return r
def nodify (path):
return fileinfo(path)
# actually this is like "updatewalk"
# if a file... check the live version against the node values (last check)
# and UPDATE
def docwalk (node, entering=False):
# the root node can't enter
path = node['path']
if os.path.isfile(path):
# FILE
# start with mtime
stat = os.lstat(path)
mtime = stat.st_mtime
if 'mtime' not in node or mtime != node['mtime']:
node['mtime'] = mtime # UPDATE mtime ... but has the contents changed?
# COMPUTE SHA1
sha1 = githash(path)
if 'sha1' not in node or sha1 != node['sha1']:
node['sha1'] = sha1
if entering:
print ("update/enter", path, file=sys.stderr)
else:
print ("update", path, file=sys.stderr)
# NB: Updates happen both on existing node that are truly updated when their file changed
# but also on NEW NODES that are ABOUT TO ENTER (enter is called below)
# maybe could/should set an "entering" flag...
# here should be any extended node stuff (like additional sniffing / update)
return True
else:
# DIRECTORY
# check it's contents
changed = False
oldchildrenbykey = {}
if 'children' in node:
oldchildrenbykey = {x['path']: x for x in node['children']}
else:
node['children'] = []
for child in [os.path.join(path, x) for x in sorted(os.listdir(path))]:
if child in oldchildrenbykey:
oldchild = oldchildrenbykey[child]
del oldchildrenbykey[child]
if docwalk(oldchild):
changed = True
else:
# creating a new node is done in this idiomatic way
# to allow node to be a special "dict-like" object
newnode = type(node)(path=child)
docwalk(newnode, entering=True)
# placed here, this will follow the update above
print ("enter", newnode['path'], file=sys.stderr)
node['children'].append(newnode)
changed = True
for opath, oldnode in oldchildrenbykey.items():
node['children'].remove(oldnode)
print ("exit", opath, file=sys.stderr)
changed = True
return changed
# def diffwalk (node, oldnode, key, changed, children, update, enter, exit):
# """ The walk is depth first such that underlying nodes should get their enter/update methods called before parents
# In this way, underlying nodes may be known to be complete when processing in the parent's enter/update.
# Always starts with a fresh node, part of old are copied in as possible (based on is_identical).
# Calling children also always produces fresh nodes...
# join criteria based on the key function (path for files)
# SEPARATE "LIVE" stuff (node/data) from the "DOCUMENTS" (oldnode) ... focus on modifying the document always
# .... ??? only accept "oldnode" and compare it to a "live" version updating the document as needed ... with callbacks!
# """
# # node = nodify(oldnode['path'])
# node_changed = False
# oldchildrenbykey = {}
# if oldnode:
# oldchildrenbykey = dict([(key(x), x) for x in children(oldnode)])
# if identical(oldnode, node):
# # <!?> SHOULD I copy over old to new and return the new?
# # file is unchanged, so no events are triggered.
# return oldnode
# # node_changed can also be None (not sure)
# # past this point, if its a file, it HAS changed
# # can call update immediately
# # if its a directory, depends on contents
# # CAll children of (live) node --- creating new child nodes
# for cn in children(node):
# cnkey = key(cn)
# if cnkey in oldchildrenbykey:
# oldchild = oldchildrenbykey[cnkey]
# del oldchildrenbykey[cnkey]
# # recurse into children
# newchild = diffwalk(cn, oldchild, key, changed, children, update, enter, exit)
# if newchild is not oldchild:
# node_changed = True
# else:
# # RECURSE
# newchild = diffwalk(cn, None, key, changed, children, update, enter, exit)
# node_changed=True
# for oldchild in oldchildrenbykey.values():
# exit(oldchild)
# # changes['exit'].append((os.path.join(path, oldchild['name']), oldchild))
# node_changed = True
# if not node_changed and oldnode:
# return oldnode
# else:
# if oldnode:
# update((node, oldnode))
# return node
# else:
# enter(node)
# return node
# quickly checking if a folder (contents) has changed.
# for git style --- need to use sha hashes, but that is itself expensive to calculate
# so if the "index" (node) exists... check there... otherwise calculate.
def githash(path, block_size=2**20):
""" Should produce the same SHA1 hash git uses for a particular file """
# print ("computing sha1", path, file=sys.stderr)
s = hashlib.sha1()
filesize = os.path.getsize(path)
s.update("blob %u\0" % filesize)
f = open(path, "rb")
while True:
data = f.read(block_size)
if not data:
break
s.update(data)
return s.hexdigest()
def gittreehash(text, block_size=2**20):
""" Should produce the same SHA1 hash git uses for a particular tree (not sure about exact format though) """
s = hashlib.sha1()
size = len(text)
s.update("tree %u\0" % size)
s.update(text)
return s.hexdigest()
def treetext (dirnode):
""" assumes nodes are complete (entered/updated) ... with sha1's set ... so no need to touch fs """
text = ""
for x in dirnode['children']:
cols = []
cols.append(x['mode'])
if x['type'] == 'directory':
cols.append("tree")
cols.append(x['sha1'])
else:
cols.append("blob")
cols.append(x['sha1'])
cols.append(x['name'])
text += " ".join(cols)+ "\n"
return text
if __name__ == "__main__":
import argparse, json
ap = argparse.ArgumentParser("")
ap.add_argument("--path", default=None)
ap.add_argument("--doc", default=None)
args = ap.parse_args()
doc = None
if args.doc:
with open(args.doc) as f:
doc = json.load(f)
else:
doc = {'path': args.path}
change = docwalk(doc)
print ("CHANGE:", change, file=sys.stderr)
if args.doc:
with open(args.doc, "w") as f:
json.dump(doc, f, indent=2)
else:
print (json.dumps(doc, indent=2))
#!/usr/bin/env python3
import sys, subprocess, os
from ftfy import fix_text
def main (args):
for base, dirs, files in os.walk(args.path):
dirs[:] = [x for x in dirs if not x.startswith(".")]
for f in files:
fp = os.path.join(base, f)
try:
fb = fp.encode("utf-8")
except UnicodeEncodeError as e:
fp_new = fix_text(fp)
if args.rename:
os.rename(fp, fp_new)
print (f"RENAMED {fp_new}")
else:
print (f"BAD {fp_new}")
# print (f)
def add_subparser (subparsers):
p = subparsers.add_parser('fixfilenames', help='Detect & optionally rename problematic utf-8 filenames (remove surrogates)')
p.add_argument('--rename', action="store_true", default=False)
p.add_argument('path', nargs="?", default=".")
p.set_defaults(func=main)
if __name__ == "__main__":
from argparse import ArgumentParser
p = argparse.ArgumentParser("")
subparsers = p.add_subparsers(help="subcommands")
add_subparser(subparsers)
main(p.parse_args(args=(["fixfilenames"] + sys.argv[1:])))
#!/usr/bin/env python3
import argparse, sys, json
def main(args):
data = json.load(args.input)
if args.keyvalue is not None:
for key, value in args.keyvalue:
data[key] = value
print (json.dumps(