objects.py

from os import listdir
from os.path import isfile, join
import codecs
import re
import urllib 

pads = [
'http://10.10.161.238/ether/p/amazon/export/txt',
'http://10.10.161.238/ether/p/polyhistorm/export/txt',
'http://10.10.161.238/ether/p/etymologiae/export/txt',
'http://10.10.161.238/ether/p/cyclopaedia/export/txt',
'http://10.10.161.238/ether/p/encyclopedia/export/txt',
'http://10.10.161.238/ether/p/great-inventions/export/txt',
'http://10.10.161.238/ether/p/taric/export/txt'
]

padnames = []
files = []
for pad in pads:
	text = urllib.urlopen(pad)
	text = text.readlines()
	files.append(text)
	padname = pad.replace('http://10.10.161.238/ether/p/','')
	padname = padname.replace('/export/txt','')
	padnames.append(padname)

obj = 'coffin'
words = ['coffin', 'bier']
exclude = []
strwords = '-'.join(words)
strwordsnice = ' '.join(words)
strexclude = ' '.join(exclude)

with codecs.open('../../stories/object-polyhistor.html','w+', 'utf-8') as output:
	output.write('<small class="info">highlighted words: '+strwordsnice+'</small><br>\n')
	output.write('<small class="info">excluded words: '+strexclude+'</small>\n\n')
	output.write('<div id="main">\n')

	levelnumber = '1'

	for i, lines in enumerate(files):
		output.write('<div class="source"><small>')
		output.write(padnames[i])
		output.write('</small><br>')
		for i, line in enumerate(lines):
			line = line.replace('\n','<br>')

			# tabs
			line = re.sub(r"\s{8}", "\t", line)
			m = re.search(r"^\t*", line)
			if m:
				tab = len(m.group(0)) + 1
				strtab = str(tab)

			# highlight search word
			if any(word in line for word in words):
				if not any(word in line for word in exclude):
					for word in words:
						line = line.replace(word,'<span class="high'+levelnumber+'">'+word+'</span>')
						# line = line.encode("utf-8")
					output.write('\n\n<div class="line t'+strtab+'"><small class="linenumber">'+str(i)+'</small>\n'+line+'\n</div>')

	output.write('</div>\n\n')

print '*output written*'