Commit c5eaaf17 authored by ana's avatar ana
Browse files

alphabetic glovereader with glove sample text

parent 6f89dcea
This diff is collapsed.
#!/usr/bin/env python
# This script takes pretrained wordvectors using GloVe as input and prints each word with the 300 dimensions, using a colour and central position for the word
# Download wordvectors here:
# Copyright (C) 2016 Constant, Algolit
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details: <>.
import re
import colors
from colors import green
import time
from collections import OrderedDict
import os
# open file
source=open("data/glove.42B.300d.txt", "r")
#source=open("data/glovesample.txt", "r")
# declare regular expression, any letter
pattern = re.compile("[a-zA-Z]*")
zxy = {}
abc = {}
# read line per line
for line in source:
line = source.readline()
# find the word
cuts = re.findall(pattern, line)
# if there is a word
if cuts[0]:
# isolate word
word = cuts[0]
# calculate lenght of the word
cut = len(word)
# split line in a list of word & numbers
newline = line[cut:]
# add to unsorted dictionary
zxy[word] = newline
abc = OrderedDict(sorted(zxy.items()))
for key, value in abc.items():
#print word in center of screen, in green
print("\t\t\t\t\t\t\t\t\t", green(key), "\n")
# print numbers
print(value, "\n")
# allow 1 second pause for legibility
os.system('cls' if == 'nt' else 'clear')
# 66 woorden per seconde als we in 8u zouden tonen
# alfabetisch rangeschikken in tweede script
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment