Commit 80c03693 authored by Alexandre Leray's avatar Alexandre Leray
Browse files

File formatting to make it more readable

parent 10fd5e2a
......@@ -29,9 +29,12 @@ as an exception, deal with leading numbers a la SRT?
either the URL, the TIMECODE, or both need to be present
"""
import sys, re, cgi # for cgi.escape of titles
import cgi # for cgi.escape of titles
import re
import sys
import timecode
splitpat = re.compile(
r"""^
(?# OPTION 1. URL + optionally a timecode )
......@@ -42,7 +45,7 @@ splitpat = re.compile(
\s* --> \s*
(?P<urlend> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)?)?
\s*)
|
(?# OPTION 2. just a timecode )
......@@ -53,10 +56,11 @@ splitpat = re.compile(
(?P<end> ((\d\d):)? (\d\d): (\d\d) ([,.]\d{1,3})?)?
\s*)
$""",
re.X|re.M
re.X | re.M
)
def split (text):
def split(text):
pos = 0
bodies = []
heads = [None]
......@@ -69,6 +73,7 @@ def split (text):
bodies.append(text[pos:])
return zip(heads, bodies)
def parse(text):
"""
returns [({headers}, "body")]
......@@ -76,13 +81,15 @@ def parse(text):
seq, url, [hash], start, [start_tc], end, [end_tc]
"""
if hasattr(text, "read"): text = text.read()
if hasattr(text, "read"):
text = text.read()
ret = []
for (h, b) in split(text):
b = b.strip()
if (h == None and b == ""): continue
if (h == None and b == ""):
continue
if h == None:
h = {'url': None, 'start': None, 'end': None}
elif 'url' in h and h["url"] != None:
......@@ -95,7 +102,7 @@ def parse(text):
(h['url'], h['hash']) = svals
else:
(h['url'], h['hash']) = (url, None)
if h:
if h['start']:
h['start_tc'] = h['start']
......@@ -103,11 +110,12 @@ def parse(text):
if h['end']:
h['end_tc'] = h['end']
h['end'] = timecode.timecode_tosecs(h['end'])
ret.append((h,b))
return ret
ret.append((h, b))
return ret
if (__name__ == "__main__"):
test = """
......@@ -117,7 +125,7 @@ if (__name__ == "__main__"):
# for m in urlpat.finditer(test.strip()):
# pprint (m.groupdict())
# for m in splitpat.finditer(sys.stdin.read()):
# print "match"
# print m.groupdict()
......@@ -127,4 +135,4 @@ if (__name__ == "__main__"):
for (h, b) in parse(sys.stdin.read()):
print h
print b
print "="*20
print "=" * 20
......@@ -18,7 +18,8 @@
# FFMpeg
import os, sys
import os
import sys
import re
# import popen2
from settings import FFMPEGPATH, CONVERTPATH, MEDIA_ROOT, MEDIA_URL
......@@ -26,11 +27,14 @@ from subprocess import Popen, PIPE, STDOUT
DEBUG = 0
def streamInfoGetComponent (sinfo, cname):
def streamInfoGetComponent(sinfo, cname):
for rec in sinfo:
if (rec[0] == cname): return rec[1]
if (rec[0] == cname):
return rec[1]
def streamInfoGetVideoSize (sinfo):
def streamInfoGetVideoSize(sinfo):
vidinfo = streamInfoGetComponent(sinfo, 'video')
if (vidinfo != None):
parts = vidinfo.split(",")
......@@ -41,7 +45,8 @@ def streamInfoGetVideoSize (sinfo):
return map(lambda x: int(x), ret)
return (-1, -1)
def getStreamInfo (path):
def getStreamInfo(path):
## HACK TO FIX WEIRD LOCAL FILE SEGFAULT...
## turn a local URL into a local path
if path.startswith(MEDIA_URL):
......@@ -50,33 +55,37 @@ def getStreamInfo (path):
ret = {}
results = do('-i "%s"' % path)
duration = 0
for line in results.splitlines():
line = line.strip().lower()
results = re.search("duration:\s*(\d\d):(\d\d):(\d\d)\.(\d+)?", line, re.IGNORECASE)
results = re.search("duration:\s*(\d\d):(\d\d):(\d\d)\.(\d+)?", \
line, re.IGNORECASE)
if results:
results = results.groups()
duration = (int(results[0]) * 60 * 60) + (int(results[1]) * 60) + int(results[2])
duration = (int(results[0]) * 60 * 60) + (int(results[1]) * 60) \
+ int(results[2])
if len(results) > 3:
duration = float(str(duration)+"."+results[3])
duration = float(str(duration) + "." + results[3])
else:
duration = float(duration)
results = re.search(r"stream #(\d+)\.(\d+).*(audio|video): (.*)", line, re.IGNORECASE)
results = re.search(r"stream #(\d+)\.(\d+).*(audio|video): (.*)", \
line, re.IGNORECASE)
if results:
results = results.groups()
stream_num = int(results[1])
type = results[2]
info = results[3]
ret[type] = info
# FURTHER EXTRACT DATA
# DURATION
if (duration > 0.0): ret['duration'] = duration
if (duration > 0.0):
ret['duration'] = duration
# VIDEO
if ret.has_key('video'):
if 'video' in ret:
# SIZE (width, height)
sizepat = r"(\d+)x(\d+),?\s*"
r = re.search(sizepat, ret['video'])
......@@ -85,7 +94,7 @@ def getStreamInfo (path):
ret['height'] = int(r.group(2))
# remove the size from the video info string
ret['video'] = re.sub(sizepat, "", ret['video'])
# FRAMERATE (fps)
fpspat = r"(\d+(\.\d+)?)\s*fps(\(.*\))?\s*"
r = re.search(fpspat, ret['video'])
......@@ -98,16 +107,21 @@ def getStreamInfo (path):
return ret
def do (params):
""" added limiter on repeated lines in response to an apparent ffmpeg bug that seems to loop outputting an error message """
def do(params):
""" added limiter on repeated lines in response to an apparent ffmpeg bug
that seems to loop outputting an error message """
cmd = FFMPEGPATH + ' ' + params
if DEBUG: print "FFMPEG", params
if DEBUG:
print "FFMPEG", params
# subprocess = popen2.Popen4(cmd)
# pout = subprocess.fromchild
# pin = subprocess.tochild
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, \
close_fds=True)
(pin, pout) = (p.stdin, p.stdout)
results = ''
......@@ -115,11 +129,14 @@ def do (params):
repeatcount = 0
while 1:
outline = pout.readline()
if not outline: break
if DEBUG: print "LINE:", outline
if not outline:
break
if DEBUG:
print "LINE:", outline
if (outline == lastline):
repeatcount += 1
if repeatcount > 10: break
if repeatcount > 10:
break
else:
results += outline
lastline = outline
......@@ -128,18 +145,22 @@ def do (params):
pout.close()
os.kill(p.pid, 1)
return results
return results
def normalize (params):
def normalize(params):
cmd = "%s %s" % (NORMALIZEPATH, params)
if DEBUG: print cmd
if DEBUG:
print cmd
pout = os.popen(cmd)
while 1:
line = pout.readline()
if not line: break
if not line:
break
pout.close()
return
def getThumbnail(path, size="160x120", time=None):
# size is parameter to "convert -resize "
# so imagemagick options (like !) can be used
......@@ -167,17 +188,18 @@ def getThumbnail(path, size="160x120", time=None):
# original shell pipeline:
# ffmpeg -i input.mp4 -ss 15 -an -t 0:0:0.001 -f image2 - 2> /dev/null |\
# convert jpg:- -resize 160x120 jpg:-
# convert jpg:- -resize 160x120 jpg:-
# version for shell=False if for some reason that is necessary...
# cmd1 = [settings.FFMPEGPATH, "-i", path, "-ss", time, "-an","-t","0:0:0.001","-f","image2","-"]
# p1 = Popen(cmd1, stdout=PIPE, stderr=open('/dev/null', 'w'))
# cmd1 = FFMPEGPATH + " -i \"%s\" -ss %s -an -t 0:0:0.001 -f image2 - 2> /dev/null"
cmd1 = FFMPEGPATH + " -i \"%s\" -ss %s -an -dframes 1 -f image2 - 2> /dev/null"
cmd1 = FFMPEGPATH + \
" -i \"%s\" -ss %s -an -dframes 1 -f image2 - 2> /dev/null"
cmd1 %= (path, str(time))
p1 = Popen(cmd1, stdout=PIPE, shell=True)
cmd2 = CONVERTPATH + " jpg:- -resize %s jpg:- 2> /dev/null"
cmd2 %=(size)
cmd2 %= (size)
p2 = Popen(cmd2, stdin=p1.stdout, stdout=PIPE, shell=True)
return p2.communicate()[0]
......@@ -187,8 +209,7 @@ if (__name__ == "__main__"):
try:
src = sys.argv[1]
except IndexError:
src="Videos/jodi.VOB"
src = "Videos/jodi.VOB"
mi = getStreamInfo(src)
print mi
......@@ -25,32 +25,41 @@ except ImportError:
import json
import re, urlparse, urllib
import re
import urlparse
import urllib
# from aawiki.models import name_urlescape
from django.core.urlresolvers import reverse
wikify_protectedchars = re.compile(r"[?+]", re.I)
# re.compile(r"[^a-zA-Z0-9\-()_]", re.I)
def name_normalize (n):
""" Normalizes the name: strips whitespace & capitalizes the first letter """
def name_normalize(n):
""" Normalizes the name: strips whitespace & capitalizes the first letter
"""
n = n.strip()
if (len(n) > 0):
n = n[0].upper() + n[1:]
return n
def name_urlescape (n):
""" normal to url:
Normalizes, then replaces spaces with underscore, and escapes other 'protected' chars """
def name_urlescape(n):
""" normal to url: Normalizes, then replaces spaces with underscore, and
escapes other 'protected' chars """
n = name_normalize(n)
n = n.replace(" ", "_")
def escape (n):
def escape(n):
char = n.group(0)
return urllib.quote(char)
n = wikify_protectedchars.sub(escape, n)
return n
def name_urlunescape (n):
def name_urlunescape(n):
""" url to normal """
n = n.replace("_", " ")
if (type(n) == unicode):
......@@ -58,6 +67,7 @@ def name_urlunescape (n):
n = urllib.unquote(n)
return n
class LinkForm:
"""
full form:
......@@ -75,14 +85,14 @@ class LinkForm:
"""
# this pattern failed on two consecutive links that could be read as one
# pat = re.compile(r"\[\[\s*(?:(.+?)\s*::)?\s*(?:(.+?)\s*:)?\s*(.+?)\s*(?:\|\s*(.+?)\s*)?\s*\]\]")
# pat = re.compile(r"\[\[\s*(?:(.+?)\s*::)?\s*(?:(.+?)\s*:)?\s*(.+?)\s*(?:\|\s*(.+?)\s*)?\s*\]\]")
# thus . => [^\]] (anything but a close bracket), in the spirit of minimally limiting.
# pat = re.compile(r"\[\[\s*(?:(?P<rel>[^\]]+?)\s*::)?\s*(?:(?P<namespace>[^\]]+?)\s*:)?\s*(?P<item>[^\]]+?)\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?\s*\]\]")
# pat = re.compile(r"\[\[\s*(?:(?P<rel>[^\]#]+?)\s*::)?\s*(?:(?P<namespace>[^\]#]+?)\s*:)?\s*(?P<item>[^\]#]+?)\s*(?:#\s*(?P<fragment>[^\]]+?))?\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?\s*\]\]")
# pat = re.compile(r"\[\[\s*(?:(?P<rel>[^\]]+?)\s*::)?\s*(?:(?P<namespace>[^\]]+?)\s*:)?\s*(?P<item>[^\]]+?)\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?\s*\]\]")
# pat = re.compile(r"\[\[\s*(?:(?P<rel>[^\]#]+?)\s*::)?\s*(?:(?P<namespace>[^\]#]+?)\s*:)?\s*(?P<item>[^\]#]+?)\s*(?:#\s*(?P<fragment>[^\]]+?))?\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?\s*\]\]")
# rel, item, frag, label
# pat = re.compile(r"\[\[\s*(?P<contents>(?:(?P<rel>[^\]#]+?)\s*::?)?\s*(?P<item>[^\]#]+?)\s*(?:#\s*(?P<fragment>[^\]]+?))?\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?)\s*\]\]")
# pat = re.compile(r"\[\[\s*(?P<contents>(?:(?P<rel>[^\]#]+?)\s*::?)?\s*(?P<item>[^\]#]+?)\s*(?:#\s*(?P<fragment>[^\]]+?))?\s*(?:\|\s*(?P<label>[^\]]+?)\s*)?)\s*\]\]")
pat = re.compile(r"""\[\[ \s*
(?P<contents>
(?:(?P<rel>[^\]#]+?) \s* ::?)? \s*
......@@ -90,7 +100,7 @@ class LinkForm:
(?:\# \s* (?P<fragment>[^\]]+?) )? \s*
(?:\| \s* (?P<label>[^\]]+?) \s*)?
) \s*
\]\]""", re.X)
\]\]""", re.X)
# White space-preserving pattern
ppat = re.compile(r"""\[\[
......@@ -98,10 +108,10 @@ class LinkForm:
(?P<item> \s* (?P<item_name> [^\]#]+? ) \s* )
(?P<fragment> \# (?P<fragment_name> [^\]]+? ) \s* )?
(?P<label> \| \s* (?P<label_name> [^\]]+? ) \s*)?
\]\]""", re.X)
\]\]""", re.X)
# (?:#\s*(?P<fragment>[^\]]+?))?
def __init__ (self, match):
def __init__(self, match):
self.source = match.group(0)
d = match.groupdict()
# HACK! CHECK IF ORIGINAL looks like a URL (http only at the moment!)
......@@ -117,7 +127,7 @@ class LinkForm:
try:
hashpos = contents.rindex("#")
self.item = contents[:hashpos]
self.fragment = contents[hashpos+1:]
self.fragment = contents[hashpos + 1:]
except ValueError:
pass
return
......@@ -128,7 +138,7 @@ class LinkForm:
self.label = d['label']
# for (key, val) in match.groupdict().iteritems():
# setattr(self, key, val)
def unparse(self):
ret = u"[[ "
if (self.rel):
......@@ -141,21 +151,26 @@ class LinkForm:
ret += self.label + " "
ret += "]]"
return ret
def debug(self):
return "[[ %s :: %s # %s | %s ]]" % ((self.rel or ""), self.item, (self.fragment or ""), (self.label or ""))
return "[[ %s :: %s # %s | %s ]]" % ((self.rel or ""), self.item, \
(self.fragment or ""), (self.label or ""))
# linkpat = re.compile(r"\[\[[\w()|/ ]+\]\]")
def Link_parseText (text):
# linkpat = re.compile(r"\[\[[\w()|/ ]+\]\]")
def Link_parseText(text):
""" translate text to a list of Link objects """
ret = []
for match in LinkForm.pat.finditer(text):
ret.append(LinkForm(match))
return ret
def Link_sub (sub, text):
def Link_sub(sub, text):
return LinkForm.pat.sub(sub, text)
def Link_markup_sub(match):
link = LinkForm(match)
label = link.label or link.item
......@@ -167,7 +182,8 @@ def Link_markup_sub(match):
return """<a href="%s" rel="%s" class="tag">%s</a>""" \
% (item, rel, label)
def Link_markup (text):
def Link_markup(text):
""" translate text to a list of Link objects """
return LinkForm.pat.sub(Link_markup_sub, text)
......@@ -180,11 +196,11 @@ if __name__ == "__main__":
"""
def unparseFromDict(d):
return u"[[" + (d['rel'] or "") + (d['item'] or "") + \
(d['fragment'] or "") + (d['label'] or "") + u"]]"
def unparseFromDict (d):
return u"[[" + (d['rel'] or "") + (d['item'] or "") + (d['fragment'] or "") + (d['label'] or "") + u"]]"
def unparseNorm (m):
def unparseNorm(m):
d = m.groupdict()
parts = []
if d['rel_name']:
......@@ -196,19 +212,16 @@ if __name__ == "__main__":
parts.append("label: %s" % d['label_name'])
return u"<Link " + ", ".join(parts) + u">"
for m in LinkForm.ppat.finditer(text):
print unparseNorm(m)
d = m.groupdict()
print unparseFromDict(d)
def findAndReplaceLinkNames (oldname, newname, text):
def sub (match):
return "*"
return LinkForm.pat.sub(sub, text)
print findAndReplaceLinkNames ("", "", text)
def findAndReplaceLinkNames(oldname, newname, text):
def sub(match):
return "*"
return LinkForm.pat.sub(sub, text)
print findAndReplaceLinkNames("", "", text)
......@@ -21,7 +21,9 @@ SRT.py
classes to represent & parse SRT subRip subtitle files
"""
import sys, re, cgi # for cgi.escape of titles
import cgi # for cgi.escape of titles
import re
import sys
import timecode
## EXAMPLE SRT FILE:
......@@ -39,7 +41,8 @@ import timecode
timecode_pat = re.compile(r"^(?:(\d\d):)?(\d\d):(\d\d)([,.]\d\d\d)?\s*-->\s*((?:(\d\d):)?(\d\d):(\d\d)([,.]\d\d\d)?)?", re.M)
number = re.compile(r"^\d+$")
def get_timecodes (match):
def get_timecodes(match):
"""
translates a match object into tuple of 1 or 2 4-tuples as in:
(HH, MM, SS, FRAMES)
......@@ -48,15 +51,15 @@ def get_timecodes (match):
g = match.groups()
tc1 = None
tc2 = None
# tc1 is 0,1,2 + 3?
# tc2 is 5,6,7 + 8?
if g[3]:
if g[0]:
tc1 = (int(g[0]), int(g[1]), int(g[2]), int(g[3][1:]))
tc1 = (int(g[0]), int(g[1]), int(g[2]), int(g[3][1:]))
else:
tc1 = (0, int(g[1]), int(g[2]), int(g[3][1:]))
tc1 = (0, int(g[1]), int(g[2]), int(g[3][1:]))
else:
if g[0]:
tc1 = (int(g[0]), int(g[1]), int(g[2]), 0)
......@@ -67,50 +70,54 @@ def get_timecodes (match):
if g[8]:
if g[5]:
tc2 = (int(g[5]), int(g[6]), int(g[7]), int(g[8][1:]))
else:
tc2 = (0, int(g[6]), int(g[7]), int(g[8][1:]))
else:
tc2 = (0, int(g[6]), int(g[7]), int(g[8][1:]))
else:
if g[5]:
tc2 = (int(g[5]), int(g[6]), int(g[7]), 0)
tc2 = (int(g[5]), int(g[6]), int(g[7]), 0)
else:
tc2 = (0, int(g[6]), int(g[7]), 0)
tc2 = (0, int(g[6]), int(g[7]), 0)
if tc2:
return (tc1, tc2)
else:
# nb the importance of the extra comma to return a 1 element list!
return (tc1,)
def timecodelist_to_secs (tc):
def timecodelist_to_secs(tc):
# print "TCUNPARSE: " + str(tc)
if len(tc) == 4:
fract_str = str(tc[3])
# append leading zeros if necessary
while len(fract_str) < 3:
fract_str = "0" + fract_str
secs = (tc[0]*3600) + (tc[1]*60) + tc[2] + float("0."+fract_str)
secs = (tc[0] * 3600) + (tc[1] * 60) + tc[2] + float("0." + fract_str)
else:
secs = (tc[0]*3600) + (tc[1]*60) + tc[2]
secs = (tc[0] * 3600) + (tc[1] * 60) + tc[2]
return secs
def match_to_secs (m):
def match_to_secs(m):
return [timecodelist_to_secs(x) for x in get_timecodes(m)]
class SRTSequenceException (Exception):
class SRTSequenceException(Exception):
pass
class SRT:
def __init__(self):
self.titles = []
def unparse_xml (self, inCDATA = False):
def unparse_xml(self, inCDATA=False):
ret = "<srt>\n"
for t in self.titles:
ret += t.unparse_xml(inCDATA)
ret += "</srt>\n"
return ret
def unparse (self, number=True):
def unparse(self, number=True):
ret = ""
count = 1
for t in self.titles:
......@@ -120,7 +127,7 @@ class SRT:
ret += t.unparse()
return ret
def addTitle (self, text, start, end=None):
def addTitle(self, text, start, end=None):
"""
addTitle(text, start, [end])
start & end may be string timecodes or numeric seconds
......@@ -131,13 +138,13 @@ class SRT:
end = timecode.timecode_tosecs(end)
self.titles.append(Title(start, end, text))
def normalize_endtimes (self, duration = None):
def normalize_endtimes(self, duration=None):
"""
sets end times on titles
also enforce sorted order, non-overlapping, and
adding buffer titles as needed
duration, if given, is used for the final title
titles must have non-null start times
"""
......@@ -149,7 +156,7 @@ class SRT:
else:
starts.append(title)
starts.sort(key=lambda x: x.start)