Commit 8da3d1ee by murtaugh

feeder

parent 85184589
......@@ -68,15 +68,24 @@ EXT["image/jpg"] = "jpg"
IMAGE_EXT = set(("png", "jpg", "gif"))
# def get_url_filename (url):
# return os.path.basename(urlparse(url).path)
def ensure_extension (filename, ext):
base, fext = os.path.splitext(filename)
return base+"."+ext
def wget (url, cachedir, blocksize=4*1000):
# if type(url) == unicode:
# url = url.encode("utf-8")
fin = urlopen(url)
ct = fin.info().get("content-type")
if ct in EXT:
filename = ensure_extension(os.path.basename(urlparse(url).path), EXT[ct])
count = 0
md5 = hashlib.md5()
path = os.path.join(cachedir, "tmp."+EXT[ct])
# path = os.path.join(cachedir, "tmp."+EXT[ct])
path = os.path.join(cachedir, filename)
with open(path, "wb") as fout:
while True:
data = fin.read(blocksize)
......@@ -90,8 +99,6 @@ def wget (url, cachedir, blocksize=4*1000):
print ("Unknown content-type", ct, file=sys.stderr)
return None, None
# def get_url_filename (url):
# return os.path.basename(urlparse(url).path)
# def safename (n):
# n = re.sub(u"[ ]", u"_", n)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment