...
 
Commits (2)
from db import connect, find_one_word, find_words
from sonnets import generate_queneau_sonnet
import re
import random
import sys
"""
word CHARACTER(50),
lemma CHARACTER(50),
tag CHARACTER(5),
syllable_count INTEGER,
rhyme_end CHARACTER(5),
nature_score FLOAT,
has_nature_score BOOLEAN
"""
conn = connect()
def load_queneau_replacement_words ():
with open('../data/queneau_words.txt', 'r') as source:
return [l.strip() for l in source]
def find_replacement(word, should_rhyme=False, nature_score=None):
# Find information about this word
word_data = find_one_word(conn, [('tag', 'NOM'), ('word', word)])
if word_data:
# Find words with an equal amount of syllables and the same tag
parameters = [ ('tag', 'NOM'),
('gender', word_data['gender']),
('plural', word_data['plural']),
('syllable_count', word_data['syllable_count'])]
if nature_score:
parameters.append(('nature_score', 'BETWEEN', '{} AND {}'.format(nature_score[0], nature_score[1])))
parameters.append(('has_nature_score', 1))
if should_rhyme:
parameters.append(('rhyme_end', word_data['rhyme_end']))
parameters.append(('word', '!=', word))
words = find_words(conn, parameters, limit=10)
if words:
return [w['word'] for w in words[:10]]
return None
words_to_replace = load_queneau_replacement_words()
def generate_replaced_sonnet (score):
# generate a sonnet
sonnet = generate_queneau_sonnet()
new_sonnet = []
# per line replace candidate words with a word that is similar
for line in sonnet:
new_line = []
should_rhyme = True
line_words = line.split(' ')
line_words.reverse()
for word in line_words:
if word in words_to_replace:
replacements = None
# start = 8
# i = 0
# # First try to find a replacement within the nature selection
# while not replacements and i <= 5:
# replacements = find_replacement(word, should_rhyme, nature_score=(start, 9999))
# start -= 1
# i += 1
window = 0.25
# First try to find a replacement within the nature selection
while not replacements and window <= 10:
replacements = find_replacement(word, should_rhyme, nature_score=(score-window, score+window))
window += .25
# If we can't find a replacement within that range select
# based on syllable count and rhyme preference
if not replacements:
replacements = find_replacement(word, should_rhyme)
# If still no match and we where looking for a rhyming word
# drop rhyme preference
if not replacements and should_rhyme:
replacements = find_replacement(word, should_rhyme=False)
# Finally give up
if not replacements:
replacements = ['NO REPLACEMENTS']
new_line.append((word, replacements))
should_rhyme = False
else:
if re.match(r'\w+', word):
should_rhyme = False
new_line.append((word,))
new_line.reverse()
new_sonnet.append(new_line)
return new_sonnet
# for debug take one line, see how many options there are
# what is the correct pos-tag?
if __name__ == '__main__':
if len(sys.argv) > 1:
score = float(sys.argv[1])
else:
score = 0.0
new_sonnet = generate_replaced_sonnet(score)
print ('A generated sonnet with nature scorce: {}'.format(score))
for line in new_sonnet:
output = []
for word in line:
if len(word) > 1:
output.append('{replacement} ({old})'.format(old=word[0], replacement=random.choice(word[1])))
else:
output.append(word[0])
print(' '.join(output))
\ No newline at end of file
import sqlite3
import settings
from utils import error, debug
INDEXES = [
('tag', 'word'),
('tag', 'gender', 'plural', 'syllable_count'),
('tag', 'gender', 'plural', 'syllable_count', 'nature_score'),
('tag', 'gender', 'plural', 'syllable_count', 'rhyme_end'),
('tag', 'gender', 'plural', 'syllable_count', 'nature_score', 'rhyme_end'),
('tag', 'gender', 'plural', 'syllable_count', 'word'),
('tag', 'gender', 'plural', 'syllable_count', 'nature_score', 'word'),
('tag', 'gender', 'plural', 'syllable_count', 'rhyme_end', 'word'),
('tag', 'gender', 'plural', 'syllable_count', 'nature_score', 'rhyme_end', 'word' )
]
def connect():
conn = sqlite3.connect(settings.DATABASE)
conn.row_factory = sqlite3.Row
return conn
def drop_table_and_indexes(conn):
cur = conn.cursor()
try:
for row in INDEXES:
cur.execute("DROP INDEX IF EXISTS {}".format('_'.join(row)))
cur.execute("DROP TABLE IF EXISTS words")
return True
except Exception as e:
conn.rollback()
error("Could not drop words table {}".format(e))
return False
def create_table(conn):
cur = conn.cursor()
try:
cur.execute("""CREATE TABLE IF NOT EXISTS words(
word CHARACTER(50),
lemma CHARACTER(50),
tag CHARACTER(5),
gender CHARACTER(1),
plural BOOLEAN,
syllable_count INTEGER,
rhyme_end CHARACTER(5),
nature_score FLOAT,
has_nature_score BOOLEAN
)""")
conn.commit()
return True
except Exception as e:
conn.rollback()
error("Could not create sentences table {}".format(e))
return False
def create_indexes(conn):
cur = conn.cursor()
try:
for row in INDEXES:
name = '_'.join(row)
columns = ', '.join(row)
q = "CREATE INDEX IF NOT EXISTS {name} ON words ({columns})".format(name=name, columns=columns)
cur.execute(q)
conn.commit()
return True
except Exception as e:
conn.rollback()
error("Could not create indexes {}".format(e))
return False
def insert_word (conn, word='', lemma='', tag='', gender='', plural=False, syllable_count=0, rhyme_end='', nature_score = 0.0, has_nature_score = False):
cur = conn.cursor()
try:
cur.execute("""INSERT INTO words(
word,
lemma,
tag,
gender,
plural,
syllable_count,
rhyme_end,
nature_score,
has_nature_score)
VALUES(
:word,
:lemma,
:tag,
:gender,
:plural,
:syllable_count,
:rhyme_end,
:nature_score,
:has_nature_score
)
""", {
'word': word,
'lemma': lemma,
'tag': tag,
'gender': gender,
'plural': plural,
'syllable_count': syllable_count,
'rhyme_end': rhyme_end,
'nature_score': nature_score,
'hsa_nature_score': has_nature_score
})
conn.commit()
return True
except Exception as e:
conn.rollback()
error('Could not insert word {}'.format(e))
return False
def insert_many_words (conn, words=[]):
cur = conn.cursor()
try:
cur.executemany("""INSERT INTO words(
word,
lemma,
tag,
gender,
plural,
syllable_count,
rhyme_end,
nature_score,
has_nature_score)
VALUES(
:word,
:lemma,
:tag,
:gender,
:plural,
:syllable_count,
:rhyme_end,
:nature_score,
:has_nature_score
)
""", words)
conn.commit()
return True
except Exception as e:
conn.rollback()
error('Could not insert word {}'.format(e))
return False
def make_where_part (parameters = [], allowed_columns = []):
allowed_compares = ['<', '<=', '=', '>=', '>', '<>', '!=', 'BETWEEN']
default_compare = '='
filtered_parameters = {}
where_chunks = []
for row in parameters:
column = row[0]
if column in allowed_columns:
if len(row) > 2:
compare = row[1]
if compare not in allowed_compares:
compare = default_compare
value = row[2]
else:
compare = default_compare
value = row[1]
if compare == 'BETWEEN':
where_chunks.append('{column} {compare} {value}'.format(column=column, compare=compare, value=value))
else:
where_chunks.append('{column} {compare} :{column}'.format(column=column, compare=compare))
filtered_parameters[column] = value
if where_chunks:
where_part = 'WHERE ' + ' AND '.join(where_chunks)
else:
where_part = None
return (where_part, filtered_parameters)
"""
'word': ('<', 1)
"""
def find_words (conn, parameters = [], limit=None):
try:
allowed_columns = ['word', 'lemma', 'tag', 'gender', 'plural', 'syllable_count', 'rhyme_end', 'nature_score', 'has_nature_score']
where_part, parameters = make_where_part(parameters, allowed_columns)
sql = 'SELECT * FROM words'
if where_part:
sql += ' ' + where_part
sql += ' ORDER BY RANDOM()'
if limit:
sql += ' LIMIT {}'.format(limit)
# debug(sql)
cur = conn.cursor()
cur.execute(sql, parameters)
return cur.fetchall()
except Exception as e:
conn.rollback()
error('Could not find words {}'.format(e))
return False
def find_one_word (conn, parameters = []):
try:
allowed_columns = ['word', 'lemma', 'tag', 'syllable_count', 'rhyme_end', 'nature_score', 'has_nature_score']
where_part, parameters = make_where_part(parameters, allowed_columns)
sql = 'SELECT * FROM words'
if where_part:
sql += ' ' + where_part
sql += ' ORDER BY RANDOM()'
cur = conn.cursor()
cur.execute(sql, parameters)
return cur.fetchone()
except Exception as e:
conn.rollback()
error('Could not find words {}'.format(e))
return False
\ No newline at end of file
"""
-- word: str
-- lemma: str
-- tag: str
-- syllable_count: int
-- rhyme_end: str
-- nature_score: float
"""
import csv
from db import connect, create_table, create_indexes, insert_many_words, drop_table_and_indexes
from nature_classifier import predict
def extract_rhyme (syllables):
## To do more intelligent
consonants = ['b', 'c', 'd', 'f', 'g', 'G', 'h', 'j', 'k', 'l', 'm', 'n', 'N', 'p', 'r', 'R', 's', 'S', 't', 'v', 'w', 'x', 'z', 'Z']
vowels = ['a', 'e', 'i', 'o', 'u', 'y', 'A', 'E', 'I', 'O', 'U', 'Y', '1', '2', '5', '8', '9', '0', '°', '@', '§']
chars = list(syllables.split('-')[-1])
# print(chars)
rhyme = []
while len(chars) > 0:
rhyme.append(chars.pop())
if rhyme[-1] in vowels:
rhyme.reverse()
# print(''.join(rhyme))
return ''.join(rhyme)
# elif rhyme[-1] not in consonants:
# print(syllables)
# print(vowels, rhyme, rhyme[-1] in vowels)
# print('UNKNOWN CHAR ', rhyme[-1])
# exit()
# # chars = last.unshift()
return ''.join(rhyme)
with open('../data/Lexique.csv', 'r') as source:
reader = csv.reader(source)
conn = connect()
drop_table_and_indexes(conn)
if create_table(conn) and create_indexes(conn):
words = []
unscored = 0
for idx, row in enumerate(reader):
if idx > 0:
if idx % 1000 == 0:
print(idx)
# nature_score = 1
nature_score = predict(row[0])
has_nature_score = True
if nature_score is None:
nature_score = predict(row[2])
if nature_score is None:
nature_score = 0
has_nature_score = False
unscored += 1
words.append({
'word': row[0],
'lemma': row[1],
'tag': row[3],
'gender': row[4],
'plural': True if row[5] == 'p' else False,
'syllable_count': row[23],
'rhyme_end': extract_rhyme(row[22]),
'nature_score': nature_score,
'has_nature_score': has_nature_score
})
print('Adding {} words'.format(len(words)))
print('Unscored words: {}'.format(unscored))
insert_many_words(conn, words)
\ No newline at end of file
from db import connect, find_one_word, find_words
from sonnets import generate_queneau_sonnet
import re
import random
import sys
"""
word CHARACTER(50),
lemma CHARACTER(50),
tag CHARACTER(5),
syllable_count INTEGER,
rhyme_end CHARACTER(5),
nature_score FLOAT,
has_nature_score BOOLEAN
"""
conn = connect()
def load_queneau_replacement_words ():
with open('../data/queneau_words.txt', 'r') as source:
return [l.strip() for l in source]
def find_replacement(word, should_rhyme=False, nature_score=None):
# Find information about this word
word_data = find_one_word(conn, [('tag', 'NOM'), ('word', word)])
if word_data:
# Find words with an equal amount of syllables and the same tag
parameters = [ ('tag', 'NOM'),
('gender', word_data['gender']),
('plural', word_data['plural']),
('syllable_count', word_data['syllable_count'])]
if nature_score:
parameters.append(('nature_score', 'BETWEEN', '{} AND {}'.format(nature_score[0], nature_score[1])))
parameters.append(('has_nature_score', 1))
if should_rhyme:
parameters.append(('rhyme_end', word_data['rhyme_end']))
parameters.append(('word', '!=', word))
words = find_words(conn, parameters, limit=10)
if words:
return [w['word'] for w in words[:10]]
return None
words_to_replace = load_queneau_replacement_words()
def generate_replaced_sonnet (score):
# generate a sonnet
sonnet = generate_queneau_sonnet()
new_sonnet = []
# per line replace candidate words with a word that is similar
for line in sonnet:
new_line = []
should_rhyme = True
line_words = line.split(' ')
line_words.reverse()
for word in line_words:
if word in words_to_replace:
replacements = None
# start = 8
# i = 0
# # First try to find a replacement within the nature selection
# while not replacements and i <= 5:
# replacements = find_replacement(word, should_rhyme, nature_score=(start, 9999))
# start -= 1
# i += 1
window = 0.25
# First try to find a replacement within the nature selection
while not replacements and window <= 10:
replacements = find_replacement(word, should_rhyme, nature_score=(score-window, score+window))
window += .25
# If we can't find a replacement within that range select
# based on syllable count and rhyme preference
if not replacements:
replacements = find_replacement(word, should_rhyme)
# If still no match and we where looking for a rhyming word
# drop rhyme preference
if not replacements and should_rhyme:
replacements = find_replacement(word, should_rhyme=False)
# Finally give up
if not replacements:
replacements = ['NO REPLACEMENTS']
new_line.append((word, replacements))
should_rhyme = False
else:
if re.match(r'\w+', word):
should_rhyme = False
new_line.append((word,))
new_line.reverse()
new_sonnet.append(new_line)
return new_sonnet
# for debug take one line, see how many options there are
# what is the correct pos-tag?
if __name__ == '__main__':
if len(sys.argv) > 1:
score = float(sys.argv[1])
else:
score = 0.0
new_sonnet = generate_replaced_sonnet(score)
print ('A generated sonnet with nature scorce: {}'.format(score))
for line in new_sonnet:
output = []
for word in line:
if len(word) > 1:
output.append('{replacement} ({old})'.format(old=word[0], replacement=random.choice(word[1])))
else:
output.append(word[0])
print(' '.join(output))
\ No newline at end of file
import os.path
DEBUG = True
BASEPATH = os.path.dirname(os.path.realpath(__file__))
DATABASE = os.path.join(BASEPATH, '../data/mariemont.db')
\ No newline at end of file
......@@ -34,6 +34,9 @@ def generate_sonnet (verses):
# Conveinience function. Inefficient as it
# reads the source text with every call!
"""
Generates a sonnet, returns a list of lines
"""
def generate_queneau_sonnet ():
return generate_sonnet(load_verses('sonnets_queneau.txt'))
......
from settings import DEBUG
def debug (text):
if DEBUG:
print("Debug : {}".format(text))
def log (text):
print("Log : {}".format(text))
def error (text):
print("Error : {}".format(text))
\ No newline at end of file