Commit 69ee7188 by murtaugh

strip all whitespace in descriptions

parent dc786724
Showing with 24 additions and 15 deletions
......@@ -34,8 +34,12 @@ THUMBNAIL_EXTS = set(("png", "jpg", "gif", "pdf", "svg"))
DL_VIMEO_FORMAT = "http-360p"
DATETIME_STRF = "%Y-%m-%d %H:%M:%S"
def ts2dt (ts):
return datetime.datetime.fromtimestamp(mktime(ts))
# def ts2dt (ts):
# return datetime.datetime.fromtimestamp(mktime(ts))
def strip_all_whitespace (text):
""" Trims a string, normalizing internal runs of (any) whitespace to a single space """
return re.sub("\s+", " ", text).strip()
def youtube_dl (url, cachedir, format=None):
args = [YOUTUBE_DL]
......@@ -178,7 +182,7 @@ class PageWeb (object):
# print ("author:", item.author, file=sys.stderr)
et = html5lib.parseFragment(item.description, treebuilder="etree", namespaceHTMLElements=False)
description_text = ET.tostring(et, method="text", encoding="unicode").strip()
description_text = strip_all_whitespace(ET.tostring(et, method="text", encoding="unicode"))
# print ("DT", description_text, file=sys.stderr)
# basename = datetime.datetime.now().strftime("%Y-%m-%d_")
now = datetime.datetime.now()
......@@ -197,7 +201,8 @@ class PageWeb (object):
ext = ext[1:].lower()
# wikifilename = basename+h+ext
# print ("Uploading data from {0} to wiki as {1}".format(link, wikifilename), file=sys.stderr)
description = "{0} {1}".format(link, now.strftime("%Y/%m/%d"))
# Repéré le 9 décembre 2014 à http://fr.wikipedia.org/wiki/Experience_de_Milgram
description = "{{{{Provenance web|URL={0}|Date={1}}}}}".format(link, now.strftime("%Y/%m/%d"))
wikifile = upload_file_to_wiki(self.wiki, filename, None, description, phpuploader=phpuploader, user=user)
gallery.append(wikifile.name)
if thumbnail == None and ext in THUMBNAIL_EXTS:
......
import os
from urllib.parse import urlparse
import re
# def page_exists (wiki, name):
# return wiki.pages[name].exists
def pagename_for_url (url):
""" Example: """
p = urlparse(url)
hostname = p.netloc
if ":" in hostname:
hostname = hostname.split(":")[0]
return os.path.join(hostname, os.path.basename(p.path))
def strip_all_whitespace (text):
return re.sub("\s+", " ", text).strip()
print (pagename_for_url("http://ergoteradio.tumblr.com:9001/post/160201362653"))
print (pagename_for_url("http://ergoteradio.tumblr.com/post/160201362653?art=fooo#batatta"))
print ("|{0}|".format(strip_all_whitespace("""
This is a test.
-- More things here.
* Hey what about that!
""")))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment