Commit 9c19ce74 by murtaugh

Abstracted get_item_date

parent 458802f7
Showing with 12 additions and 6 deletions
......@@ -108,6 +108,14 @@ class SitePage (object):
hostname = hostname.split(":")[0]
return os.path.join(hostname, os.path.basename(p.path))
@staticmethod
def get_item_date (i):
""" Example: """
if 'updated' in i and 'updated_parsed' in i:
return datetime.datetime.fromtimestamp(mktime(i['updated_parsed']))
elif 'published' in i and 'published_parsed' in i:
return datetime.datetime.fromtimestamp(mktime(i['published_parsed']))
def __init__ (self, wiki, url):
self.wiki = wiki
self.url = url
......@@ -143,8 +151,7 @@ class SitePage (object):
else:
print ("Creating wiki page \"{0}\"".format(self.page.name), file=sys.stderr)
ts = ts2dt(item.updated_parsed)
# ts = datetime.datetime(*item.published_parsed[:6])
ts = self.get_item_date(item)
# print ("published:", ts.strftime("%Y/%m/%d %H:%M:%S"), file=sys.stderr)
# print ("title:", item.title, file=sys.stderr)
......@@ -277,8 +284,7 @@ class SiteWebPage (object):
def process_feed(self, cachedir, limit=None, phpuploader=None, user=None, force=False, pretend=False):
feed = feedparser.parse(self.feed_url)
# ensure in reverse chronological order (probably redundant but important for the processing)
# feed.entries.sort(key=lambda x: x.published_parsed, reverse=True)
feed.entries.sort(key=lambda x: x.updated_parsed, reverse=True)
feed.entries.sort(key=lambda x: self.get_item_date(x), reverse=True)
# print (feed.entries[0].published_parsed, "to", feed.entries[-1].published_parsed)
count = 0
# Process in CHRONOLOGICAL order... skipping elements that OLDER than / equal to feed's last updated timestamp
......@@ -290,7 +296,7 @@ class SiteWebPage (object):
use_all_entries = []
# print ("processing feed, last_updated {0}".format(self.last_updated.strftime(DATETIME_STRF)), file=sys.stderr)
for item in all_entries:
item_dt = ts2dt(item.updated_parsed)
item_dt = self.get_item_date(item)
if item_dt <= self.last_updated:
# print ("Skipping older item {0}".format(item.title), file=sys.stderr)
skipped += 1
......@@ -303,7 +309,7 @@ class SiteWebPage (object):
print ("No new items since feed last updated", file=sys.stderr)
for item in all_entries:
item_dt = ts2dt(item.updated_parsed)
item_dt = self.get_item_date(item)
sitepage = SitePage(self.wiki, item.link)
sitepage.process_feed_item(item, source=self.pagename, cachedir=cachedir, phpuploader=phpuploader, user=user, pretend=pretend)
if self.last_updated == None or item_dt > self.last_updated:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment