Script to generate graphs and wordlists of neighbouring words

from gensim.models import KeyedVectors as KV
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sys import argv
import argparse
import os.path
import os
parser = argparse.ArgumentParser(description='Find closest words in word2vec.')
parser.add_argument('data', help="Path to the dataset")
parser.add_argument('topn', type=int, default=10,
help="Amount of words to look for")
args = parser.parse_args()
dataset = argv[0]
similar_amount = argv[1]
if not os.path.exists('export'):
print 'Loading data {}'.format(
model = KV.load_word2vec_format(, binary=False)
# print model.similar_by_word('elisabeth', topn=20)
print 'Preparing TSNE'
tsne = TSNE(n_components=2, random_state=0)
Y = tsne.fit_transform(model.syn0)
while True:
search_word = raw_input('Word: ')
print 'Searching most similar words'
similar = model.similar_by_word(search_word, topn=args.topn)
print 'Found: ', similar
words = [w for w, d in similar]
selected_x = []
selected_y = []
for w in words:
i = model.vocab[w].index
selected_x.append(Y[i, 0])
selected_y.append(Y[i, 1])
print 'Generating plot'
with open('export/txt-{}.txt'.format(search_word), 'w') as h:
h.write("Most similar words to: {}\n".format(search_word))
for w in words:
plt.scatter(selected_x, selected_y)
for label, x, y in zip(words, selected_x, selected_y):
plt.annotate(label, xy=(x, y), xytext=(
0, 0), textcoords='offset points')
except KeyError:
print 'Word not in dataset?'
