Commit fb881457 authored by colm's avatar colm

this closes #9, not in the most elegant fashion (the list is still built by...

this closes #9, not in the most elegant fashion (the list is still built by hand but at least now the regex is a loop) for now it will do fine
parent 382c7e5d
......@@ -6,11 +6,10 @@ import re
app = Flask(__name__)
thisdir = os.getcwd()
pattern0 = r'(probabl\w*)'
pattern1 = r'(possibl\w*)'
pattern2 = r'(peut)'
pattern3 = r'\s(être)'
pattern4 = r'(peut\-être)'
patternList = [r'(probabl\w*)', r'(possibl\w*)', r'(peut\-être)', r'(bijna)',\
r'(could)', r'(effectivement)' r'(éventuellement)', r'(jamais)', r'(may)', r'(might)', \
r'(misschien)', r'(mogelijk)', r'(must)', r'(ongeveer)', r'(parfois)', r'(perhaps)', r'(presque)', \
r'(probably)', r'(sometimes)', r'(souvent)', r'(waarschijnlijk)', r'(zeker)']
replace0 = r'<span class="uncertain_match">\1</span>'
replace1 = r'<span class="uncertain_match"> \1</span>'
uncertainlist = glob.glob('static/word_patterns/*.csv')
......@@ -27,15 +26,14 @@ for file in uncertainlist:
for row in file:
row[1] = row[1].replace(" ", "-")
row[1] = row[1].replace(".", "-")
row[11] = re.sub(pattern4, replace0, row[11])
row[11] = re.sub(pattern0, replace0, row[11])
row[11] = re.sub(pattern1, replace0, row[11])
# row[11] = re.sub(pattern2, replace0, row[11])
# row[11] = re.sub(pattern3, replace1, row[11])
for pattern in patternList:
row[11] = re.sub(pattern, replace0, row[11])
full_counter += 1
full_dict[filename] = currentlist
amounts = []
for word, results in full_dict.items():
word_amount = word, len(results)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment