...
 
Commits (2)
......@@ -131,7 +131,7 @@ class SitePage (object):
self.page = self.wiki.pages.get(pagename)
return self
def process_feed_item (self, item, source, cachedir, download=True, phpuploader=None, user=None):
def process_feed_item (self, item, source, cachedir, download=True, phpuploader=None, user=None, pretend=False):
# print ("-"*10, file=sys.stderr)
# sitepage = SitePage(wiki, item.link)
# page, data = get_site_page_and_data(wiki, item.link)
......@@ -146,7 +146,7 @@ class SitePage (object):
# print ("published:", ts.strftime("%Y/%m/%d %H:%M:%S"), file=sys.stderr)
# print ("title:", item.title, file=sys.stderr)
# print ("description:", item.description, file=sys.stderr)
print ("Description src:\n{0}\n".format(item.description), file=sys.stderr)
# print (item.keys())
# print ("author:", item.author, file=sys.stderr)
......@@ -197,14 +197,13 @@ class SitePage (object):
if description_text:
page_contents += "\n\n== Description ==\n{0}\n".format(description_text)
print ("Saving PAGE...", file=sys.stderr)
print ("Saving PAGE...\n", file=sys.stderr)
print (page_contents, file=sys.stderr)
print (file=sys.stderr)
self.page.save(page_contents)
if not pretend:
self.page.save(page_contents)
class FluxPage (object):
class SiteWebPage (object):
@staticmethod
def parse_dt (val):
try:
......@@ -215,7 +214,7 @@ class FluxPage (object):
@classmethod
def get_all (cls, wiki):
result = wiki.get("cargoquery",
tables="Flux",
tables="Site_web",
fields="_pageName=pagename,Feed_url=feed_url,Site_url=site_url,Last_updated=last_updated",
order_by="pagename")
# print (json.dumps(result, indent=2))
......@@ -236,7 +235,7 @@ class FluxPage (object):
def init (self):
result = self.wiki.get("cargoquery",
tables="Flux",
tables="Site_web",
fields="_pageID=pageID,_pageTitle=pageTitle,_pageName=pageName,_pageNamespace=pageNamespace,Feed_url,Site_url,Last_updated",
where="_pageName=\"{0}\"".format(self.pagename),
limit=1)
......@@ -250,7 +249,7 @@ class FluxPage (object):
self.last_updated = self.parse_dt(result['Last updated'])
def template_text (self):
return """{{{{Flux
return """{{{{Site web
|Feed_url={feed_url}
|Site_url={site_url}
|Last_updated={updated}
......@@ -272,7 +271,7 @@ class FluxPage (object):
print ("WARNING: Template not found. Skipping save", file=sys.stderr)
# page.save(self.template_text())
def process_feed(self, cachedir, limit=None, phpuploader=None, user=None):
def process_feed(self, cachedir, limit=None, phpuploader=None, user=None, force=False, pretend=False):
feed = feedparser.parse(self.feed_url)
# ensure in reverse chronological order (probably redundant but important for the processing)
feed.entries.sort(key=lambda x: x.published_parsed, reverse=True)
......@@ -282,8 +281,8 @@ class FluxPage (object):
all_entries = reversed(feed.entries)
skipped = 0
# Filter list when last updated is present
if self.last_updated:
# Filter list when last updated is present (and not using force)
if not force and self.last_updated:
use_all_entries = []
# print ("processing feed, last_updated {0}".format(self.last_updated.strftime(DATETIME_STRF)), file=sys.stderr)
for item in all_entries:
......@@ -302,10 +301,11 @@ class FluxPage (object):
for item in all_entries:
item_dt = ts2dt(item.published_parsed)
sitepage = SitePage(self.wiki, item.link)
sitepage.process_feed_item(item, source=self.pagename, cachedir=cachedir, phpuploader=phpuploader, user=user)
sitepage.process_feed_item(item, source=self.pagename, cachedir=cachedir, phpuploader=phpuploader, user=user, pretend=pretend)
if self.last_updated == None or item_dt > self.last_updated:
self.last_updated = item_dt
self.save()
if not pretend:
self.save()
count += 1
if limit and count>=limit:
break
......@@ -317,6 +317,8 @@ if __name__ == "__main__":
# ap.add_argument("--feed", default="http://ergoteradio.tumblr.com/rss")
# ap.add_argument("--download", action="store_true", default=False)
ap.add_argument("--limit", type=int, default=None)
ap.add_argument("--force", default=False, action="store_true")
ap.add_argument("--pretend", default=False, action="store_true")
ap.add_argument("--cachedir", default="cache", help="directory where files are temporarily downloaded")
ap.add_argument("--phpimportimages", default=None, help="optional: path to mw/maintenance/importImages.php. Default is to upload via the API")
......@@ -339,16 +341,16 @@ if __name__ == "__main__":
# if page:
# print (page.name)
if args.source:
flux = FluxPage(site, args.source)
flux.process_feed(args.cachedir, limit=args.limit, phpuploader=args.phpimportimages)
flux = SiteWebPage(site, args.source)
flux.process_feed(args.cachedir, limit=args.limit, phpuploader=args.phpimportimages, user=args.user, force=args.force, pretend=args.pretend)
else:
print ("Processing all feeds", file=sys.stderr)
for i, f in enumerate(FluxPage.get_all(site)):
for i, f in enumerate(SiteWebPage.get_all(site)):
print ("[{0}] {1}".format(i, f.pagename), file=sys.stderr)
print ("feed_url: {0}".format(f.feed_url), file=sys.stderr)
if f.last_updated:
print ("last_updated: {0}".format(f.last_updated), file=sys.stderr)
else:
print ("last_updated: ---", file=sys.stderr)
f.process_feed(args.cachedir, limit=args.limit, phpuploader=args.phpimportimages, user=args.user)
f.process_feed(args.cachedir, limit=args.limit, phpuploader=args.phpimportimages, user=args.user, force=args.force, pretend=args.pretend)
print (file=sys.stderr)
......@@ -111,7 +111,7 @@ def upload_file_to_wiki (wiki, path, filename=None, description="", phpuploader=
sh.update(data)
im = None
for im in wiki.allimages(sha1=sh.hexdigest()):
print ("[upload_file_to_wiki]: File already uploaded to wiki.", file=sys.stderr)
print ("[upload_file_to_wiki]: File <{0}> already uploaded to wiki.".format(os.path.basename(path)), file=sys.stderr)
return im
if filename == None:
......