Commit c7a6a25c by ana

Adding script that reads Glovepretrained wordvectors

parent ceb595ba
#!/usr/bin/env python
# This script takes pretrained wordvectors using GloVe as input and prints each word with the 300 dimensions, using a colour and central position for the word
# Download wordvectors here: http://nlp.stanford.edu/data/glove.42B.300d.zip
# Copyright (C) 2016 Constant, Algolit
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details: <http://www.gnu.org/licenses/>.
import re
import colors
from colors import green
import time
# open file
source=open("data/glove.42B.300d.txt", "r")
# declare regular expression, any letter
pattern = re.compile("[a-zA-Z]*")
# read line per line
for line in source:
line = source.readline()
# find the word
cuts = re.findall(pattern, line)
# if there is a word
if cuts[0]:
# isolate word
word = cuts[0]
# calculate lenght of the word
cut = len(word)
# split line in a list of word & numbers
newline = line[cut:]
# print word in center of screen, in green
print("\t\t\t\t\t\t\t\t\t", green(word), "\n")
# print numbers
print(newline, "\n")
# allow 1 second pause for legibility
time.sleep(1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment