Commit ae7808a3 authored by Michael Murtaugh's avatar Michael Murtaugh
Browse files

new self contained Sconstruct + template to use with just scons

parent a4b82914
#!/usr/bin/env python3
# coding: utf-8
# standard library
import json, os, datetime
from urllib.parse import quote as urlquote
import subprocess, re, json
import sys
import re, math
from xml.etree import ElementTree as ET
# external libraries
import jinja2, html5lib, isodate, markdown
from jinja2 import Markup
from exif import Image as ExifImage
"""
## important env variables
INDEX_TEMPLATE (default templates/index.html)
INDEX_SCRIPTS (: delim list of scripts)
INDEX_STYLESHEETS (: delimited list of stylesheets)
SCONS_ROOT root path
"""
def image_get_info(path, data=None):
pat = re.compile(r"(?P<format>\w+) (?P<width>\d+)x(?P<height>\d+)")
def extract (text):
m = pat.search(text)
if m:
return m.groupdict()
p = subprocess.run(["identify", path], capture_output=True)
o = p.stdout.decode("utf-8", errors="replace")
if data == None:
data = {}
data['mediatype'] = "image"
d = extract(o)
if d:
data['image_format'] = d['format']
data['width'] = int(d['width'])
data['height'] = int(d['height'])
# print(f"attempting to ExifImage {path}")
try:
with open (path, "rb") as f:
im = ExifImage(f)
if im and im.has_exif:
if hasattr(im, "datetime_original"):
data['datetime_original'] = im.datetime_original
if hasattr(im, "model"):
data['model'] = im.model
if hasattr(im, "software"):
data['software'] = im.software
if hasattr(im, "lens_model"):
data['lens_model'] = im.lens_model
if hasattr(im, "flash") and hasattr(im.flash, "flash_fired"):
data['flash'] = im.flash.flash_fired
if hasattr(im, "f_number"):
data['f_number'] = im.f_number
if hasattr(im, "focal_length"):
data['focal_length'] = im.focal_length
if hasattr(im, "orientation") and hasattr(im.orientation, 'name'):
data['orientation'] = im.orientation.name
if hasattr(im, "exposure_time"):
data['exposure_time'] = im.exposure_time
except Exception as e:
pass
return data
##############################
def timecode_fromsecs(rawsecs, fract=True, alwaysfract=False, fractdelim=',', alwayshours=False):
# timecode_pat = re.compile(r"(\d+):(\d+):(\d+)(?:[.,](\d+))?")
timecode_pat = re.compile(r"(?:(\d+):)?(\d+):(\d+)(?:[.,](\d+))?")
# returns a string in HH:MM:SS[.xxx] notation
# if fract is True, uses .xxx if either necessary (non-zero)
# OR alwaysfract is True
hours = math.floor(rawsecs / 3600)
rawsecs -= hours * 3600
mins = math.floor(rawsecs / 60)
rawsecs -= mins * 60
if fract:
secs = math.floor(rawsecs)
rawsecs -= secs
if (rawsecs > 0 or alwaysfract):
fract = "%.03f" % rawsecs
if hours or alwayshours:
return "%02d:%02d:%02d%s%s" % (hours, mins, secs, fractdelim, \
fract[2:])
else:
return "%02d:%02d%s%s" % (mins, secs, fractdelim, fract[2:])
else:
if hours or alwayshours:
return "%02d:%02d:%02d" % (hours, mins, secs)
else:
return "%02d:%02d" % (mins, secs)
else:
secs = round(rawsecs)
if hours or alwayshours:
return "%02d:%02d:%02d" % (hours, mins, secs)
else:
return "%02d:%02d" % (mins, secs)
def ffmpeg_get_info(url, data=None):
sizepat = re.compile(r"Video:.*?(\d\d+)x(\d\d+)") # hack: avoids 0x445 by requiring 2 or more nums
def extract_size (text):
m = sizepat.search(text)
if m:
return [int(x) for x in m.groups()]
return (None, None)
"""
Stream #0:0(eng): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, mono, fltp, 124 kb/s (default)
Stream #0:0: Audio: vorbis, 44100 Hz, stereo, fltp, 192 kb/s
Stream #0:1: Audio: vorbis, 44100 Hz, stereo, fltp, 499 kb/s
"""
audiopat = re.compile(r"Stream #0:(?P<sid>\d+)(.*) Audio: (?P<description>.+)")
def extract_audio_streams (text):
for m in audiopat.finditer(text, re.M):
yield m.groupdict()
videopat = re.compile(r"Stream #0:(?P<sid>\d+)(.*) Video: (?P<description>.+)")
def extract_video_streams (text):
for m in videopat.finditer(text, re.M):
yield m.groupdict()
def extract_metadata (text):
ret = {}
for line in text.splitlines():
if ':' in line:
(name, value) = line.split(':', 1)
if not name.endswith("http") and (name.upper() == name):
ret[name.strip().lower()] = value.strip()
return ret
timecodepat = re.compile(r"Duration: (\d+):(\d+):(\d+)\.(\d+)")
def extract_duration (text):
m = timecodepat.search(text)
if m:
parts = m.groups()
return (int(parts[0])*3600) + (int(parts[1])*60) + int(parts[2]) + float("0."+parts[-1])
popen = subprocess.Popen(["ffmpeg", "-i", url], stderr=subprocess.PIPE)
o = popen.communicate()[1].decode("utf-8", errors="replace")
if data == None:
data = {}
dur = extract_duration(o)
if dur and dur >= 0.05:
data['duration'] = dur
size = extract_size(o)
if size and size[0] != None:
data['framesize'] = "{0}x{1}".format(*size)
data['width'] = size[0]
data['height'] = size[1]
for video in extract_video_streams(o):
data['video'] = video['description']
data['mediatype'] = "video"
for audio in extract_audio_streams(o):
data['audio'] = audio['description']
if 'mediatype' not in data:
data['mediatype'] = "audio"
# data['metadata'] = extract_metadata(o)
return data
##############################
def pdf_get_info(url, d=None):
sizepat = re.compile(r"Video:.*?(\d\d+)x(\d\d+)") # hack: avoids 0x445 by requiring 2 or more nums
def extract_size (text):
m = sizepat.search(text)
if m:
return [int(x) for x in m.groups()]
return (None, None)
def extract_metadata (text):
ret = {}
for line in text.splitlines():
if ':' in line:
(name, value) = line.split(':', 1)
if not name.endswith("http") and (name.upper() == name):
ret[name.strip().lower()] = value.strip()
return ret
timecodepat = re.compile(r"Duration: (\d+):(\d+):(\d+)\.(\d+)")
def extract_duration (text):
m = timecodepat.search(text)
if m:
parts = m.groups()
return (int(parts[0])*3600) + (int(parts[1])*60) + int(parts[2]) + float("0."+parts[-1])
# async def pdf_info_from_data (data):
# p = await asyncio.create_subprocess_exec("pdfinfo", "-", stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE)
# stdout, stderr = await p.communicate(data)
# stdout = stdout.decode("utf-8", errors="replace")
# d = {}
# for line in stdout.splitlines():
# if ":" in line:
# name, value = line.split(":", 1)
# name = name.strip()
# value = value.strip()
# d[name] = value
# return d
popen = subprocess.Popen(["pdfinfo", url], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
o = popen.communicate()[0].decode("utf-8", errors="replace")
if d is None:
d = {}
d['mediatype'] = "pdf"
for line in o.splitlines():
if ":" in line:
name, value = line.split(":", 1)
name = name.strip().lower()
value = value.strip()
d[name] = value
return d
#############
def innerHTML (elt):
if elt.text != None:
ret = elt.text
else:
ret = u""
return ret + u"".join([ET.tostring(x, method="html", encoding="unicode") for x in elt])
def make_folder_meta (target, source, env):
"""
Main Builder Action, constructs .index/.index.json from:
* contained files themselves, e.g. [ *FILE*, ... ]
* file metadata, e.g. [ *.index/FILE/metadata.json*, ... ]
* contained folders via their [ *FOLDER/.index/.index.json*, ... ]
"""
# siteroot = env.Dictionary()['siteroot']
tpath = target[0].path
path = os.path.dirname(os.path.dirname(tpath))
d = {}
d['type'] = "directory"
d['filename'] = os.path.split(path)[1]
d['id'] = urlquote(d['filename'], errors="surrogatepass")+"/"
files = 0
folders = 0
total_bytes = 0
total_files = 0
total_folders = 0
total_files_by_ext = {}
last_modification = None
cc = []
contents_by_id = {}
meta_patches = []
description = None
for src in source:
if os.path.basename(src.path) == "description.json":
with open (src.path) as f:
description = json.load(f)
for meta in description:
if 'id' in meta:
if meta['id'] == '' and 'description' in meta:
d['description'] = meta['description']
meta_patches.append((meta['id'], meta))
elif os.path.basename(src.path) == ".index.json":
folders += 1
cd = {}
cc.append(cd)
cd['type'] = "directory"
with open(src.path) as cdinfof:
cdinfo = json.load(cdinfof)
cd['id'] = cdinfo['id']
contents_by_id[cd['id']] = cd
cd['filename'] = cdinfo['filename']
# inherit / copy the description
# if 'description' in cdinfo:
# cd['description'] = cdinfo['description']
cd['total_bytes'] = cdinfo['total_bytes']
cd['total_files'] = cdinfo['total_files']
cd['total_folders'] = cdinfo['total_folders']
total_bytes += cdinfo['total_bytes']
total_folders += cdinfo['total_folders']
total_files += cdinfo['total_files']
for ext in cdinfo['total_files_by_ext']:
if ext not in total_files_by_ext:
total_files_by_ext[ext] = {'count': 0, 'total_bytes': 0}
total_files_by_ext[ext]['count']+=cdinfo['total_files_by_ext'][ext]['count']
total_files_by_ext[ext]['total_bytes']+=cdinfo['total_files_by_ext'][ext]['total_bytes']
cd['link'] = urlquote(os.path.relpath(src.path, path), errors="replace")
cd_mtime_dt = None
if 'last_modification' in cdinfo:
cd['last_modification'] = cdinfo['last_modification']
cd_mtime_dt = datetime.datetime.fromisoformat(cdinfo['last_modification'])
if last_modification is None or (cd_mtime_dt is not None and cd_mtime_dt > last_modification):
last_modification = cd_mtime_dt
elif (os.path.dirname(src.path) == path):
files += 1
cd = {}
cc.append(cd)
cd['type'] = "file"
cd['filename'] = os.path.split(src.path)[1]
cd['id'] = urlquote(cd['filename'], errors="surrogatepass")
contents_by_id[cd['id']] = cd
cd_mtime_dt = datetime.datetime.fromtimestamp(os.stat(src.path, follow_symlinks=False).st_mtime)
cd['mtime'] = cd_mtime_dt.isoformat()
cd['ext'] = os.path.splitext(src.path)[-1].lstrip(".").lower()
cd['size'] = os.path.getsize(src.path)
total_bytes += cd['size']
if cd['ext'] not in total_files_by_ext:
total_files_by_ext[cd['ext']] = {'count': 0, 'total_bytes': 0}
total_files_by_ext[cd['ext']]['count']+=1
total_files_by_ext[cd['ext']]['total_bytes']+=cd['size']
if last_modification is None or cd_mtime_dt > last_modification:
last_modification = cd_mtime_dt
elif src.path.endswith(".json"):
# Try to read / merge metadata
# .../.index/FILE/metadata.json
# TODO: code to match on pattern and check that file exists
# make extension handling a sub branch to handle json + md
mdir, meta_filename = os.path.split(src.path)
mdir, filename = os.path.split(mdir)
with open(src.path) as f:
meta = json.load(f)
if 'id' in meta:
# meta_by_id[meta['id']] = meta
meta_patches.append((meta['id'], meta))
else:
print (f"make_folder_meta: {path} unrecognized source file {src.path}")
# if meta_by_id:
# # Attempt to merge meta data nodes
# for meta_id in meta_by_id:
# if meta_id in contents_by_id:
# # print (f"Merging {meta_id}")
# contents_by_id[meta_id].update(meta_by_id[meta_id])
for meta_id, meta in meta_patches:
if meta_id in contents_by_id:
# print (f"Merging {meta_id}, {meta}")
contents_by_id[meta_id].update(meta)
cc.sort(key=lambda x: x['filename'].lower())
d['contents'] = cc
d['total_bytes'] = total_bytes
d['files'] = files
d['folders'] = folders
d['total_folders'] = total_folders + folders # nb: totals includes this folder's direct contents as well
d['total_files'] = total_files + files
d['total_files_by_ext'] = total_files_by_ext
if last_modification:
d['last_modification'] = last_modification.isoformat()
os.makedirs(os.path.split(tpath)[0], exist_ok=True)
with open(tpath, "w") as f:
json.dump(d, f, indent=2)
FolderMeta = Builder(action=make_folder_meta)
def ffmpeg_get_meta (target, source, env):
d = ffmpeg_get_info(source[0].path)
d['id'] = urlquote(os.path.basename(source[0].path), errors="surrogatepass")
def relurl(x):
return urlquote(os.path.relpath(str(x), os.path.dirname(os.path.dirname(os.path.dirname(target[0].path)))), errors="surrogatepass")
for s in source[1:]:
basename = os.path.basename(str(s))
if basename == "thumb.png":
d['thumbnail'] = relurl(s)
elif basename == "play.mp4":
d['play'] = relurl(s)
elif basename == "play.mp3":
d['play'] = relurl(s)
elif basename == "poster.png":
d['poster'] = relurl(s)
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
with open(target[0].path, "w") as f:
json.dump(d, f, indent=2)
FFMPEGMeta = Builder(action=ffmpeg_get_meta)
# VideoPoster = Builder(action="""
# mkdir -p `dirname $TARGET` && \
# ffmpeg -i $SOURCE -ss 3 -vframes 1 $TARGET
# """.strip())
def make_video_poster (target, source, env):
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
ret = subprocess.run(["ffmpeg", "-i", source[0].path, "-ss", "3", "-vframes", "1", target[0].path])
if ret.returncode != 0:
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "320x320", "-flatten", target[0].path])
VideoPoster = Builder(action=make_video_poster)
# VideoThumb = Builder(action="""
# mkdir -p `dirname $TARGET` && \
# ffmpeg -i $SOURCE -ss 3 -vframes 1 $TARGET && \
# mogrify -resize 200x200 $TARGET
# """.strip())
def make_video_thumb (target, source, env):
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
ret = subprocess.run(["ffmpeg", "-i", source[0].path, "-ss", "3", "-vframes", "1", target[0].path])
if ret.returncode == 0:
subprocess.run(["mogrify", "-resize", "200x200", target[0].path])
else:
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "200x200", "-flatten", target[0].path])
VideoThumb = Builder(action=make_video_thumb)
# ImagePoster = Builder(action="""
# mkdir -p $TARGET.dir && \
# convert $SOURCE[0] -auto-orient -resize 640x640 -flatten $TARGET || convert wizard: -resize 640x640 $TARGET
# """.strip())
def make_image_poster (target, source, env):
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
ret = subprocess.run(["convert", source[0].path+"[0]", "-auto-orient", "-resize", "640x640", "-flatten", target[0].path])
if ret.returncode != 0:
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "640x640", "-flatten", target[0].path])
ImagePoster = Builder(action=make_image_poster)
# ImageThumb = Builder(action="""
# mkdir -p $TARGET.dir && \
# convert $SOURCE[0] -auto-orient -resize 200x200 -flatten $TARGET || convert wizard: -resize 640x640 $TARGET
# """.strip())
def make_image_thumb (target, source, env):
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
ret = subprocess.run(["convert", source[0].path+"[0]", "-auto-orient", "-resize", "200x200", "-flatten", target[0].path])
if ret.returncode != 0:
subprocess.run(["convert", "wizard:", "-auto-orient", "-resize", "200x200", "-flatten", target[0].path])
ImageThumb = Builder(action=make_image_thumb)
def make_image_meta (target, source, env):
d = image_get_info(source[0].path)
d['id'] = urlquote(os.path.basename(source[0].path), errors="surrogatepass")
def relurl(x):
return urlquote(os.path.relpath(str(x), os.path.dirname(os.path.dirname(os.path.dirname(target[0].path)))), errors="surrogatepass")
for s in source[1:]:
basename = os.path.basename(str(s))
if basename in ("thumb.jpg", "thumb.png"):
d['thumbnail'] = relurl(s)
elif basename in ("poster.jpg", "poster.png"):
d['poster'] = relurl(s)
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
with open(target[0].path, "w") as f:
json.dump(d, f, indent=2)
ImageMeta = Builder(action=make_image_meta)
def make_pdf_meta (target, source, env):
d = pdf_get_info(source[0].path)
d['id'] = urlquote(os.path.basename(source[0].path), errors="surrogatepass")
def relurl(x):
return urlquote(os.path.relpath(str(x), os.path.dirname(os.path.dirname(os.path.dirname(target[0].path)))), errors="surrogatepass")
for s in source[1:]:
basename = os.path.basename(str(s))
if basename in ("thumb.jpg", "thumb.png"):
d['thumbnail'] = relurl(s)
elif basename in ("poster.jpg", "poster.png"):
d['poster'] = relurl(s)
os.makedirs(os.path.dirname(target[0].path), exist_ok=True)
with open(target[0].path, "w") as f:
json.dump(d, f, indent=2)
PDFMeta = Builder(action=make_pdf_meta)
def add_attribute_to_links (src, attrname, attrvalue):
t = html5lib.parseFragment(src, treebuilder="etree", namespaceHTMLElements=False)
for a in t.findall(".//a"):
a.attrib[attrname]=attrvalue
return innerHTML(t)
def template_action (target, source, env):
envd = env.Dictionary()
tpath, tname = os.path.split((envd.get("INDEX_TEMPLATE") or "templates/index.html"))
rootpath = os.path.abspath(envd.get("SCONS_ROOT"))
index_scripts = (envd.get("INDEX_SCRIPTS") or "").split(":")
index_stylesheets = (envd.get("INDEX_STYLESHEETS") or "").split(":")
index_scripts = [x for x in index_scripts if x] # eliminate blank elements
index_stylesheets = [x for x in index_stylesheets if x]
# print (f"template_action, rootpath: {rootpath}")
jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(tpath))
jenv.filters['strftime'] = lambda x, format='%Y-%m-%d %H:%M:%S': datetime.datetime.fromisoformat(x).strftime(format)
jenv.filters['filename2title'] = lambda x: os.path.splitext(x)[0].replace("_", " ")
jenv.filters['add_attribute_to_links'] = add_attribute_to_links
md = markdown.Markdown(extensions=['meta'])
jenv.filters['markdown'] = lambda text: Markup(md.convert(text))
jenv.filters['wbr_'] = lambda x: x.replace("_", "_<wbr>")
jenv.filters['isotime'] = lambda x: datetime.datetime.fromtimestamp(t).isoformat()
jenv.filters['isoduration'] = lambda x: isodate.duration_isoformat(datetime.timedelta(0, x))
jenv.filters['timecode'] = timecode_fromsecs
template = jenv.get_template(tname)
# calc breadcrumbs...
# example breadcrumbs
# [
# {'relpath': '../../', 'name': '/'},
# {'relpath': '../', 'name': 'Constant_V'},
# {'relpath': '', 'name': 'videos'},
# ]
path = os.path.dirname(os.path.dirname(source[0].path))
rpath = os.path.relpath(os.path.abspath(path), rootpath)
if rpath == ".":
bc = [{'relpath': '', 'name':'/'}]
else:
rpath = rpath.split(os.sep)
rpath.insert(0, '/')
bc = [{'relpath': '../'*(len(rpath)-i-1), 'name': name} for i, name in enumerate(rpath)]
# print (f"path: {path}, breadcrumbs: {bc}")
# combine source json
with open(source[0].path) as fin:
data = json.load(fin)
# calc allkeys
allkeys = set()
for c in data['contents']:
for key in c:
allkeys.add(key)
data['allkeys'] = allkeys
data['breadcrumbs'] = bc
data['scripts'] = index_scripts
data['stylesheets'] = index_stylesheets
with open(target[0].path, "wb") as fout:
fout.write(template.render(**data).encode("utf-8", errors="surrogateescape"))
# fout.write(template.render(**data).encode("utf-8", errors="replace"))
# Template = Builder(action=template_action)
Template = Builder(action=Action(template_action, varlist=("INDEX_TEMPLATE", "INDEX_SCRIPTS", "INDEX_STYLESHEETS", "SCONS_ROOT")))
### Perform the build
builders = {
"FolderMeta": FolderMeta,
"VideoPoster": VideoPoster,
"VideoThumb": VideoThumb,
"ImageThumb": ImageThumb,
"ImagePoster": ImagePoster,
"PDFMeta": PDFMeta,
"ImageMeta": ImageMeta,
"Template": Template,
"FFMPEGMeta": FFMPEGMeta
}
env = Environment(BUILDERS=builders)
# env['ESCAPE']=escape
# env.Append(BUILDERS=builders)
# def decide_if_file_not_exists (dependency, target, prev_ni, repo_node=None):
# return not os.path.exists(str(target))
# env2 = Environment(BUILDERS=builders)
# env2 = env.Clone()
# env2.Decider(decide_if_file_not_exists)
# def metadata_for_folder (p):
# if os.path.isdir(os.path.join(path, p)):
# return os.path.join(p, ".index", ".index.json")
# return p
def ignore_p (path, ignores):
filename = os.path.basename(path)
if filename.startswith("."):
return True
if filename in ("index.html", "description.json", "__pycache__", "venv"):
return True
return False
def depwalk (path, base_path=None, ignores=None):