Commit e2fd6347 authored by alexandre's avatar alexandre
Browse files

Cleaning it seems like. I decided to do a spring clening on my computer

and foudn the repo dirty. Commiting for backup purpose. Untested
parent 7cc928e9
from django.contrib import admin
from models import Page
class PageAdmin(admin.ModelAdmin):
list_display = ("name", "content")
search_fields = ("name", "content")
admin.site.register(Page, PageAdmin)
# This file is part of Active Archives.
# Copyright 2006-2011 the Active Archives contributors (see AUTHORS)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Also add information on how to contact you by electronic and paper mail.
import re
from aawiki.mdx.mdx_sectionedit import (DATETIMECODE_HEADER_RE, spliterator)
from aawiki.timecode import (timecode_fromsecs, timecode_tosecs)
def audacity_to_srt(data, explicit=False):
"""
>>> data = '''90,610022 90,610022 first section
... 345,271874 345,271874 second section'''
>>> print(audacity_to_srt(data).strip())
00:01:30.610 -->
<BLANKLINE>
first section
<BLANKLINE>
00:05:45.272 -->
<BLANKLINE>
second section
>>> data = '''90,610022 345,271874 first section
... 345,271874 512,573912 second section'''
>>> print(audacity_to_srt(data).strip())
00:01:30.610 -->
<BLANKLINE>
first section
<BLANKLINE>
00:05:45.272 --> 00:08:32.574
<BLANKLINE>
second section
>>> data = '''90,610022 345,271874 first section
... 345,271874 512,573912 second section'''
>>> print(audacity_to_srt(data, explicit=True).strip())
00:01:30.610 --> 00:05:45.272
<BLANKLINE>
first section
<BLANKLINE>
00:05:45.272 --> 00:08:32.574
<BLANKLINE>
second section
>>> data = '''90,610022 345,271874 first section
... 345,271874 512,573912'''
>>> print(audacity_to_srt(data).strip())
00:01:30.610 --> 00:05:45.272
<BLANKLINE>
first section
<BLANKLINE>
00:05:45.272 --> 00:08:32.574
<BLANKLINE>
second section
"""
stack = []
for line in data.splitlines():
try:
(start, end, body) = tuple(line.split(None, 2))
except ValueError:
try:
# A marker without label
(start, end) = tuple(line.split(None, 1))
body = ""
except ValueError:
# A blank line? Get lost!
break
start = float(start.replace(',', '.'))
end = float(end.replace(',', '.'))
start = timecode_fromsecs(start, alwaysfract=True,
alwayshours=True, fractdelim=',')
end = timecode_fromsecs(end, alwaysfract=True,
alwayshours=True, fractdelim=',')
# If the end time equals the start time we ommit it.
if end == start:
end = ""
if not explicit:
# Deletes previous end time if equal to actual start time
if len(stack) and stack[-1]['end'] == start:
stack[-1]['end'] = ""
body = body.replace(r'\n', '\n')
stack.append({'start': start, 'end': end, 'body': body})
template = "{e[start]} --> {e[end]}\n\n{e[body]}\n\n"
return "".join([template.format(e=e) for e in stack])
def srt_to_audacity(data, force_endtime=False):
"""docstring for srt_to_audacity"""
# FIXME: UnicodeDecodeError...
pattern = re.compile(DATETIMECODE_HEADER_RE, re.I | re.M | re.X)
stack = []
for t in spliterator(pattern, data, returnLeading=0):
m = pattern.match(t[0]).groupdict()
if force_endtime:
if len(stack) and stack[-1]['end'] == '':
stack[-1]['end'] = timecode_tosecs(m['start'])
end = timecode_tosecs(m['end']) or ''
else:
end = timecode_tosecs(m['end']) or timecode_tosecs(m['start'])
stack.append({
'start': timecode_tosecs(m['start']),
'end': end,
'body': t[1].strip('\n'),
})
template = "{e[start]}\t{e[end]}\t{e[body]}\n"
return "".join([template.format(e=e) for e in stack])
if __name__ == "__main__":
import doctest
doctest.testmod()
f = open('/tmp/bla.srt', 'r')
data = f.read()
f.close()
#print(audacity_to_srt(data))
print(srt_to_audacity(data))
# This file is part of Active Archives.
# Copyright 2006-2012 the Active Archives contributors (see AUTHORS)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Also add information on how to contact you by electronic and paper mail.
import urlparse
import urllib2
import html5lib
import lxml
import lxml.cssselect
import re
import rdfutils
import subprocess
import os.path
import feedparser
from textwrap import dedent
from aacore import RDF_MODEL
class AAFilter(object):
def __init__(self, arguments, stdin):
self.arguments = arguments or ""
self.parsed_arguments = {}
self.stdin = stdin
self.stdout = stdin.copy()
self.stdout['local_path'] = self.get_next_path()
self.stdout['local_url'] = self.get_next_url()
if self.validate():
return self.run()
def validate(self):
return True
@staticmethod
def uri_to_path(uri):
return urlparse.urlparse(uri).path
def get_next_path(self):
extension = os.path.splitext(self.stdin['local_path'])[1]
return "%s|%s:%s%s" % (self.stdin['local_path'], self.__class__.__name__,
self.arguments, extension)
def get_next_url(self):
extension = os.path.splitext(self.stdin['local_url'])[1]
return "%s|%s:%s%s" % (self.stdin['local_url'], self.__class__.__name__,
self.arguments, extension)
class AAFilterEmbed(AAFilter):
name = "embed"
def run(self):
embed_style = None
if self.arguments:
embed_style = self.arguments
else:
q = dedent("""\
PREFIX dc:<http://purl.org/dc/elements/1.1/>
PREFIX aa:<http://activearchives.org/terms/>
PREFIX http:<http://www.w3.org/Protocols/rfc2616/>
PREFIX media:<http://search.yahoo.com/mrss/>
SELECT ?ctype ?format ?audiocodec ?videocodec
WHERE {{
OPTIONAL {{ <%(URL)s> http:content_type ?ctype . }}
OPTIONAL {{ <%(URL)s> dc:format ?format . }}
OPTIONAL {{ <%(URL)s> media:audio_codec ?audiocodec . }}
OPTIONAL {{ <%(URL)s> media:video_codec ?videocodec . }}
}}""".strip() % {'URL': self.stdin['original_url']})
b = {}
for row in rdfutils.query(q, RDF_MODEL):
for name in row:
b[name] = rdfutils.rdfnode(row.get(name))
break
# TODO: move to templates
if b.get('ctype') in ("image/jpeg", "image/png", "image/gif"):
embed_style = "img"
elif b.get('ctype') in ("video/ogg", "video/webm") or (b.get('videocodec') in ("theora", "vp8")):
embed_style = "html5video"
elif b.get('ctype') in ("audio/ogg", ) or (b.get('audiocodec') == "vorbis" and (not b.get('videocodec'))):
embed_style = "html5audio"
elif b.get('ctype') in ("text/html", ):
embed_style = "iframe"
elif b.get('ctype') in ("application/rss+xml", "text/xml", "application/atom+xml"):
embed_style = "feed"
else:
embed_style = None
# TODO: move to templates
if embed_style == "img":
self.stdout['output'] = '<img src="%s" />' % self.stdin['original_url']
elif embed_style == "html5video":
#self.stdout['output'] = '<video class="player" controls src="%s" />' % self.stdin['local_url']
# Temporarily fixes the local serveur serving ogg with the wronf mimetype
self.stdout['output'] = '<video class="player" controls src="%s" />' % self.stdin['original_url']
elif embed_style == "html5audio":
#self.stdout['output'] = '<audio class="player" controls src="%s" />' % self.stdin['local_url']
# Temporarily fixes the local serveur serving ogg with the wronf mimetype
self.stdout['output'] = '<audio class="player" controls src="%s" />' % self.stdin['original_url']
elif embed_style == "iframe":
self.stdout['output'] = '<iframe src="%s"></iframe>' % self.stdin['local_url']
elif embed_style == "feed":
feed = feedparser.parse(self.stdin['local_url'])
self.stdout['output'] = u''
for entry in feed['entries'][:4]:
self.stdout['output'] += u'<div>'
self.stdout['output'] += u'<h3><a href="%s">%s</a></h3>' % (entry.link, entry.title)
self.stdout['output'] += u'<div>'
self.stdout['output'] += entry.summary
self.stdout['output'] += u'</div>'
self.stdout['output'] += u'</div>'
else:
self.stdout['output'] = "<p>Unable to detect embed type</p>"
class AAFilterXPath(AAFilter):
""" Takes a url as input value and an xpath as argument.
Returns a collection of html elements
usage:
{{ "http://fr.wikipedia.org/wiki/Antonio_Ferrara"|xpath:"//h2" }}
"""
name = "xpath"
#def validate(self):
#sizepat = re.compile(r"(?P<width>\d+)px", re.I)
#match = sizepat.match(self.arguments)
#if match:
#self.parsed_arguments['width'] = match.groupdict()['width']
#return True
def absolutize_refs (self, baseurl, lxmlnode):
for elt in lxml.cssselect.CSSSelector("*[src]")(lxmlnode):
elt.set('src', urlparse.urljoin(baseurl, elt.get("src")))
return lxmlnode
def run(self):
request = urllib2.Request(self.stdin['original_url'])
request.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5")
stdin = urllib2.urlopen(request)
htmlparser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"),
namespaceHTMLElements=False)
page = htmlparser.parse(stdin)
p = page.xpath(self.arguments)
if p:
self.stdout['output'] = "\n".join([lxml.etree.tostring(self.absolutize_refs(self.stdin['original_url'], item)) for item in p])
#self.stdout['output'] = "\n".join([lxml.etree.tostring(self.absolutize_refs(self.stdin['original_url'], item), encoding='utf-8') for item in p])
else:
self.stdout['output'] = "<p>Your query for %s did not return any elements</p>" % self.arguments
class AAFilterBW(AAFilter):
name = "bw"
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -colorspace gray %s %s' % (self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
(stdout_data, stderr_data) = p1.communicate()
#(stdout_data, stderr_data) = p1.communicate(input=self.stdin)
self.stdout['output'] = "Conversion successful. Use the embed filter to display your ressource in the page"
class AAFilterRotate(AAFilter):
name = "rotate"
def validate(self):
degreespat = re.compile(r"(?P<degrees>\d+)", re.I)
match = degreespat.match(self.arguments)
if match:
self.parsed_arguments['degrees'] = match.groupdict()['degrees']
return True
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -rotate %s %s %s' % (self.parsed_arguments['degrees'],
self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
#(stdout_data, stderr_data) = p1.communicate(input=self.stdin)
(stdout_data, stderr_data) = p1.communicate()
self.stdout['output'] = "Conversion successful. Use the embed filter to display your ressource in the page"
class AAFilterResize(AAFilter):
name = "resize"
def validate(self):
sizepat = re.compile(r"(?P<width>\d+)px", re.I)
match = sizepat.match(self.arguments)
if match:
self.parsed_arguments['width'] = match.groupdict()['width']
return True
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -resize %s %s %s' % (self.parsed_arguments['width'],
self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
#(stdout_data, stderr_data) = p1.communicate(input=self.stdin)
(stdout_data, stderr_data) = p1.communicate()
self.stdout['output'] = "Conversion successful. Use the embed filter to display your ressource in the page"
class AAFilterCrop(AAFilter):
"""
[[ http://example.org/image.jpg||crop:40x30+10+10 ]]
"""
name = "crop"
def validate(self):
sizepat = re.compile(r"(?P<width>\d+)x(?P<height>\d+)\+(?P<top>\d+)\+(?P<left>\d+)", re.I)
match = sizepat.match(self.arguments)
if match:
self.parsed_arguments['width'] = match.groupdict()['width']
self.parsed_arguments['height'] = match.groupdict()['height']
self.parsed_arguments['top'] = match.groupdict()['top']
self.parsed_arguments['left'] = match.groupdict()['left']
return True
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -crop %sx%s+%s+%s %s %s' % (self.parsed_arguments['width'],
self.parsed_arguments['height'],
self.parsed_arguments['top'],
self.parsed_arguments['left'],
self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
#(stdout_data, stderr_data) = p1.communicate(input=self.stdin)
(stdout_data, stderr_data) = p1.communicate()
self.stdout['output'] = "Conversion successful. Use the embed filter to display your ressource in the page"
class AAFilterZoomable(AAFilter):
"""
[[ embed::http://example.org/image-small.jpg||zoomable:200px ]]
"""
name = "zoomable"
def validate(self):
# TODO: verify if stdin is a url
sizepat = re.compile(r"(?P<width>\d+)px", re.I)
match = sizepat.match(self.arguments)
if match:
self.parsed_arguments['width'] = match.groupdict()['width']
return True
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -resize %s %s %s' % (self.parsed_arguments['width'],
self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
(stdout_data, stderr_data) = p1.communicate()
self.stdout['output'] = """<div class="panzoom"><a href="%s" rel="zoomable"><img src="%s"></a></div>""" % (self.stdin['local_url'], self.stdout['local_url'])
self.stdout['extra_js'].append("http://andrew.hedges.name/experiments/pan-zoom/jquery.panzoom.min.js")
self.stdout['script'] += """$.panzoom();"""
class AAFilterFigure(AAFilter):
"""
[[ embed::http://example.org/image-small.jpg||embed:img|figure:my caption ]]
"""
name = "figure"
def run(self):
self.stdout['output'] = "<figure>%s<figcaption>%s</figcaption></figure>" % (self.stdin['output'], self.arguments)
class AAFilterLightBox(AAFilter):
"""
[[ embed::http://example.org/image-small.jpg||lightbox:200px ]]
"""
name = "lightbox"
def validate(self):
# TODO: verify if stdin is a url
sizepat = re.compile(r"(?P<width>\d+)px", re.I)
match = sizepat.match(self.arguments)
if match:
self.parsed_arguments['width'] = match.groupdict()['width']
return True
def run(self):
if not os.path.exists(self.stdout['local_path']):
cmd = 'convert -resize %s %s %s' % (self.parsed_arguments['width'],
self.stdin['local_path'],
self.stdout['local_path'])
p1 = subprocess.Popen(cmd.split(" "), stdout=subprocess.PIPE,
stdin=subprocess.PIPE)
#(stdout_data, stderr_data) = p1.communicate(input=self.stdin)
(stdout_data, stderr_data) = p1.communicate()
self.stdout['output'] = """<a href="%s" rel="lightbox"><img src="%s"></a>""" % (self.stdin['local_url'], self.stdout['local_url'])
self.stdout['extra_css'].append("http://leandrovieira.com/projects/jquery/lightbox/css/jquery.lightbox-0.5.css")
self.stdout['extra_js'].append("http://leandrovieira.com/projects/jquery/lightbox/js/jquery.lightbox-0.5.pack.js")
self.stdout['script'] += """$('a[rel="lightbox"]').lightBox();"""
if __name__ == '__main__':
filters = {}
for filter_ in AAFilter.__subclasses__():
filters[filter_.name] = filter_
stdin = AAFilter.uri_to_path("file:///tmp/gourfinck_numer_04-S-Rcd.jpg")
pipeline = "bw|embed"
for command in [x.strip() for x in pipeline.split("|")]:
if ":" in command:
(filter_, arguments) = command.split(":", 1)
filter_.strip()
command.strip()
else:
(filter_, arguments) = (command.strip(), None)
try:
stdin = filters[filter_](arguments, stdin).run()
except KeyError:
stdin = """The "%s" filter doesn't exist""" % filter_
break
print(stdin)
......@@ -8,17 +8,9 @@ class PageEditForm(forms.Form):
content = forms.CharField(
widget=forms.Textarea(attrs={'rows': '12', 'wrap': 'off'}),
label="",
required=False,
required=True,
)
message = forms.CharField(
label="Summary",
required=False,
required=True,
)
is_minor = forms.BooleanField(
label="This is a minor Edit",
required=False,
)
class AnnotationImportForm(forms.Form):
file = forms.FileField()
import codecs
import glob
import os
from django.core.management.base import BaseCommand, CommandError
from aawiki.models import Page
from aawiki import settings
class Command(BaseCommand):
args = ''
help = 'Synchronizes the database with the git repository.'
def handle(self, *args, **options):
for filename in glob.glob(os.path.join(settings.GIT_DIR, '*')):
if not os.path.isdir(filename):
basename = os.path.basename(filename)
(page, did_exist) = Page.objects.get_or_create(name=basename)
f = codecs.open(filename, 'r', 'utf-8')
page.content = f.read()
print("importing %s" % page.name)
f.close()
page.save()
from optparse import make_option
from django.core.management.base import BaseCommand, CommandError
from rdfutils import *
from aawiki.utils import (full_site_url, pagename_for_url)
from aawiki.models import Page
from aacore import RDF_MODEL
class Command(BaseCommand):
args = ''
help = ''
# option_list = BaseCommand.option_list + (
# make_option('--all',
# action='store_true',
# dest='all',
# default=False,
# help='Reindex all'),
# )
def handle(self, *args, **options):
samplepageurl = full_site_url(Page.objects.all()[0].get_absolute_url())
samplepageurl = samplepageurl.rstrip("/")
basepageurl = samplepageurl[:samplepageurl.rindex('/')] + "/"
pagenames = {}
Page.objects.all()
for s in RDF_MODEL:
obj = s.object
if obj.is_resource():
url = unicode(obj.uri)
if url.startswith(basepageurl):
pagenames[pagename_for_url(url)] = True
for name in pagenames:
(page, created) = Page.objects.get_or_create(name=name)
if created:
print "CREATED", name
## DROP ORPHAN PAGES
for page in Page.objects.all():
if page.name not in pagenames:
if page.content.strip():
hastext = True
else:
hastext = False
print "ORPHAN PAGE", page.name, hastext
if not hastext:
page.delete()
# keys = pages.keys()
# keys.sort()
# for name in keys:
# print name
#! /usr/bin/env python2
"""
Active Archives Markdown init function
======================================
It gathers the various AA Extensions for Python-Markdown