from os import listdir from os.path import isfile, join import codecs import re import urllib pads = [ 'http://10.10.161.238/ether/p/amazon/export/txt', 'http://10.10.161.238/ether/p/polyhistorm/export/txt', 'http://10.10.161.238/ether/p/etymologiae/export/txt', 'http://10.10.161.238/ether/p/cyclopaedia/export/txt', 'http://10.10.161.238/ether/p/encyclopedia/export/txt', 'http://10.10.161.238/ether/p/great-inventions/export/txt', 'http://10.10.161.238/ether/p/taric/export/txt' ] padnames = [] files = [] for pad in pads: text = urllib.urlopen(pad) text = text.readlines() files.append(text) padname = pad.replace('http://10.10.161.238/ether/p/','') padname = padname.replace('/export/txt','') padnames.append(padname) obj = 'coffin' words = ['coffin', 'bier'] exclude = [] strwords = '-'.join(words) strwordsnice = ' '.join(words) strexclude = ' '.join(exclude) with codecs.open('../../stories/object-polyhistor.html','w+', 'utf-8') as output: output.write('<small class="info">highlighted words: '+strwordsnice+'</small><br>\n') output.write('<small class="info">excluded words: '+strexclude+'</small>\n\n') output.write('<div id="main">\n') levelnumber = '1' for i, lines in enumerate(files): output.write('<div class="source"><small>') output.write(padnames[i]) output.write('</small><br>') for i, line in enumerate(lines): line = line.replace('\n','<br>') # tabs line = re.sub(r"\s{8}", "\t", line) m = re.search(r"^\t*", line) if m: tab = len(m.group(0)) + 1 strtab = str(tab) # highlight search word if any(word in line for word in words): if not any(word in line for word in exclude): for word in words: line = line.replace(word,'<span class="high'+levelnumber+'">'+word+'</span>') # line = line.encode("utf-8") output.write('\n\n<div class="line t'+strtab+'"><small class="linenumber">'+str(i)+'</small>\n'+line+'\n</div>') output.write('</div>\n\n') print '*output written*'