Commit 2458ba01 authored by manetta's avatar manetta
Browse files

adding an Algologs folder with scripts for the session tomorrow

parent 18a4a94e
import nltk
sentences = [
"I like deep learning",
"I like NLP",
"I enjoy flying"
]
counter = dict()
words = list()
for sentence in sentences:
for ngram in nltk.bigrams(sentence.split()):
ngram_sorted = tuple(sorted(ngram))
if ngram_sorted not in counter:
counter[ngram_sorted] = 0
counter[ngram_sorted] += 1
for word in ngram_sorted:
if word not in words:
words.append(word)
words.sort()
matrix = [[counter[tuple(sorted((word1, word2)))] if tuple(sorted((word1, word2))) in counter else 0 for word2 in words] for word1 in words]
"""
'expanded' version of lijn 85
matrix = []
for word1 in words:
row = []
for word2 in words:
key = tuple(sorted([word1, word2]))
if key in counter:
row.push(counter[key])
else:
row.push(0)
matrix.push(row)
"""
print("{: >10}".format('') + ' ' + ''.join(["{: <10}".format(word) for word in words]))
for k, word in enumerate(words):
print("{: >10}".format(word) + ' ' + ''.join(["{: <10}".format(c) for c in matrix[k]]))
\ No newline at end of file
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
import nltk
la = np.linalg
sentences = [
"Vandaag hebben we neural networks bestudeerd",
"Cristina was er ook, en Gijs niet",
"vandaag was het deep",
"net zo deep als deep learning"
]
# sentences = ["I like deep learning.", "I like NLP.", "I enjoy flying."]
# unique words of the text
prematrix = set()
for sentence in sentences:
print('> sentence: ', sentence)
words = sentence.split(" ")
for word in words:
word = word.lower()
word = word.strip()
prematrix.add(word)
print('\n> prematrix: \n', prematrix)
# order set & turn into list
pre2 = sorted(list(prematrix))
print('\n> pre2: \n', pre2, '\n')
# create bigrams
bigrams = []
for sentence in sentences:
for b in nltk.bigrams(sentence.lower().split()):
print('> bigram:', b)
bigrams.append(b)
print('\n> bigrams: \n', bigrams)
# create Co-occurence matrix
# create matrix with zeros, having the length of the vocabulary
X = np.zeros((len(pre2),len(pre2)), dtype=np.int)
print('\n> co-occurence matrix (empty): \n', X)
# for each bigram, add one
for b in bigrams:
X[pre2.index(b[0]), pre2.index(b[1])] = X[pre2.index(b[0]),pre2.index(b[1])] + 1
X[pre2.index(b[1]),pre2.index(b[0])] = X[pre2.index(b[1]),pre2.index(b[0])] + 1
print('\n> co-occurence matrix: \n', X)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
{
",": {
"fellow": {
"freq": 2,
"sentences": [
"For a long time I could not conceive how one man could go forth to murder his fellow , or even why there were laws and governments ; but when I heard details of vice and bloodshed , my wonder ceased and I turned away with disgust and loathing .",
"Satan had his companions , fellow devils , to admire and encourage him , but I am solitary and abhorred ."
]
},
"human": {
"freq": 3,
"sentences": [
"It was during an access of this kind that I suddenly left my home , and bending my steps towards the near Alpine valleys , sought in the magnificence , the eternity of such scenes , to forget myself and my ephemeral , because human , sorrows .",
"But again when I reflected that they had spurned and deserted me , anger returned , a rage of anger , and unable to injure anything human , I turned my fury towards inanimate objects .",
"The picture I present to you is peaceful and human , and you must feel that you could deny it only in the wantonness of power and cruelty ."
]
}
},
"a": {
"fellow": {
"freq": 4,
"sentences": [
"I spoke of my desire of finding a friend , of my thirst for a more intimate sympathy with a fellow mind than had ever fallen to my lot , and expressed my conviction that a man could boast of little happiness who did not enjoy this blessing .",
"`` So saying , he stepped aside and wrote down a list of several books treating of natural philosophy which he desired me to procure , and dismissed me after mentioning that in the beginning of the following week he intended to commence a course of lectures upon natural philosophy in its general relations , and that M. Waldman , a fellow professor , would lecture upon chemistry the alternate days that he omitted .",
"It may therefore be judged indecent in me to come forward on this occasion , but when I see a fellow creature about to perish through the cowardice of her pretended friends , I wish to be allowed to speak , that I may say what I know of her character .",
"Yet I seek not a fellow feeling in my misery ."
]
},
"human": {
"freq": 9,
"sentences": [
"Only one dog remained alive ; but there was a human being within it whom the sailors were persuading to enter the vessel .",
"It was with these feelings that I began the creation of a human being .",
"Justine , thus received in our family , learned the duties of a servant , a condition which , in our fortunate country , does not include the idea of ignorance and a sacrifice of the dignity of a human being .",
"I am poor and an exile , but it will afford me true pleasure to be in any way serviceable to a human creature .",
"I travelled only at night , fearful of encountering the visage of a human being .",
"I had saved a human being from destruction , and as a recompense I now writhed under the miserable pain of a wound which shattered the flesh and bone .",
"The remains of the half-finished creature , whom I had destroyed , lay scattered on the floor , and I almost felt as if I had mangled the living flesh of a human being .",
"My father still desired to delay our departure , fearful that I could not sustain the fatigues of a journey , for I was a shattered wreck -- the shadow of a human being .",
"Again there is a sound as of a human voice , but hoarser ; it comes from the cabin where the remains of Frankenstein still lie ."
]
}
},
"beings": {
"fellow": {
"freq": 4,
"sentences": [
"It was to be decided whether the result of my curiosity and lawless devices would cause the death of two of my fellow beings : one a smiling babe full of innocence and joy , the other far more dreadfully murdered , with every aggravation of infamy that could make the murder memorable in horror .",
"I had begun life with benevolent intentions and thirsted for the moment when I should put them in practice and make myself useful to my fellow beings .",
"These bleak skies I hail , for they are kinder to me than your fellow beings .",
"They were my brethren , my fellow beings , and I felt attracted even to the most repulsive among them , as to creatures of an angelic nature and celestial mechanism ."
]
},
"human": {
"freq": 6,
"sentences": [
"I had gazed upon the fortifications and impediments that seemed to keep human beings from entering the citadel of nature , and rashly and ignorantly I had repined .",
"I had often , when at home , thought it hard to remain during my youth cooped up in one place and had longed to enter the world and take my station among other human beings .",
"The picture appeared a vast and dim scene of evil , and I foresaw obscurely that I was destined to become the most wretched of human beings .",
"I saw few human beings besides them , and if any other happened to enter the cottage , their harsh manners and rude gait only enhanced to me the superior accomplishments of my friends .",
"The sleep into which I now sank refreshed me ; and when I awoke , I again felt as if I belonged to a race of human beings like myself , and I began to reflect upon what had passed with greater composure ; yet still the words of the fiend rang in my ears like a death-knell ; they appeared like a dream , yet distinct and oppressive as a reality .",
"In other places human beings were seldom seen , and I generally subsisted on the wild animals that crossed my path ."
]
}
},
"creature": {
"fellow": {
"freq": 1,
"sentences": [
"It may therefore be judged indecent in me to come forward on this occasion , but when I see a fellow creature about to perish through the cowardice of her pretended friends , I wish to be allowed to speak , that I may say what I know of her character ."
]
},
"human": {
"freq": 2,
"sentences": [
"I am poor and an exile , but it will afford me true pleasure to be in any way serviceable to a human creature .",
"It is true , he seldom came to see me , for although he ardently desired to relieve the sufferings of every human creature , he did not wish to be present at the agonies and miserable ravings of a murderer ."
]
}
},
"creatures": {
"fellow": {
"freq": 11,
"sentences": [
"Every night I was oppressed by a slow fever , and I became nervous to a most painful degree ; the fall of a leaf startled me , and I shunned my fellow creatures as if I had been guilty of a crime .",
"You , my creator , abhor me ; what hope can I gather from your fellow creatures , who owe me nothing ?",
"On you it rests , whether I quit forever the neighbourhood of man and lead a harmless life , or become the scourge of your fellow creatures and the author of your own speedy ruin .",
"I learned that the possessions most esteemed by your fellow creatures were high and unsullied descent united with riches .",
"You raise me from the dust by this kindness ; and I trust that , by your aid , I shall not be driven from the society and sympathy of your fellow creatures .",
"After a long pause of reflection I concluded that the justice due both to him and my fellow creatures demanded of me that I should comply with his request .",
"I feared to wander from the sight of my fellow creatures lest when alone he should come to claim his companion .",
"I left the house , the horrid scene of the last night 's contention , and walked on the beach of the sea , which I almost regarded as an insuperable barrier between me and my fellow creatures ; nay , a wish that such should prove the fact stole across me .",
"I felt as if I was about the commission of a dreadful crime and avoided with shuddering anxiety any encounter with my fellow creatures .",
"This sentiment of the worth of my nature supported me when others would have been oppressed , for I deemed it criminal to throw away in useless grief those talents that might be useful to my fellow creatures .",
"If I were engaged in any high undertaking or design , fraught with extensive utility to my fellow creatures , then could I live to fulfil it ."
]
},
"human": {
"freq": 3,
"sentences": [
"I once had a friend , the most noble of human creatures , and am entitled , therefore , to judge respecting friendship .",
"During all that period she appeared to me the most amiable and benevolent of human creatures .",
"Everybody believed that poor girl to be guilty ; and if she could have committed the crime for which she suffered , assuredly she would have been the most depraved of human creatures ."
]
}
},
"mind": {
"fellow": {
"freq": 1,
"sentences": [
"I spoke of my desire of finding a friend , of my thirst for a more intimate sympathy with a fellow mind than had ever fallen to my lot , and expressed my conviction that a man could boast of little happiness who did not enjoy this blessing ."
]
},
"human": {
"freq": 4,
"sentences": [
"If the study to which you apply yourself has a tendency to weaken your affections and to destroy your taste for those simple pleasures in which no alloy can possibly mix , then that study is certainly unlawful , that is to say , not befitting the human mind .",
"Chapter 9 Nothing is more painful to the human mind than , after the feelings have been worked up by a quick succession of events , the dead calmness of inaction and certainty which follows and deprives the soul both of hope and fear .",
"My father , who was watching over me , perceiving my restlessness , awoke me ; the dashing waves were around , the cloudy sky above , the fiend was not here : a sense of security , a feeling that a truce was established between the present hour and the irresistible , disastrous future imparted to me a kind of calm forgetfulness , of which the human mind is by its structure peculiarly susceptible .",
"Nothing is so painful to the human mind as a great and sudden change ."
]
}
},
"my": {
"fellow": {
"freq": 12,
"sentences": [
"He had endeavoured to persuade his father to permit him to accompany me and to become my fellow student , but in vain .",
"Every night I was oppressed by a slow fever , and I became nervous to a most painful degree ; the fall of a leaf startled me , and I shunned my fellow creatures as if I had been guilty of a crime .",
"It was to be decided whether the result of my curiosity and lawless devices would cause the death of two of my fellow beings : one a smiling babe full of innocence and joy , the other far more dreadfully murdered , with every aggravation of infamy that could make the murder memorable in horror .",
"I had begun life with benevolent intentions and thirsted for the moment when I should put them in practice and make myself useful to my fellow beings .",
"After a long pause of reflection I concluded that the justice due both to him and my fellow creatures demanded of me that I should comply with his request .",
"I saw an insurmountable barrier placed between me and my fellow men ; this barrier was sealed with the blood of William and Justine , and to reflect on the events connected with those names filled my soul with anguish .",
"I feared to wander from the sight of my fellow creatures lest when alone he should come to claim his companion .",
"I left the house , the horrid scene of the last night 's contention , and walked on the beach of the sea , which I almost regarded as an insuperable barrier between me and my fellow creatures ; nay , a wish that such should prove the fact stole across me .",
"I felt as if I was about the commission of a dreadful crime and avoided with shuddering anxiety any encounter with my fellow creatures .",
"They were my brethren , my fellow beings , and I felt attracted even to the most repulsive among them , as to creatures of an angelic nature and celestial mechanism .",
"This sentiment of the worth of my nature supported me when others would have been oppressed , for I deemed it criminal to throw away in useless grief those talents that might be useful to my fellow creatures .",
"If I were engaged in any high undertaking or design , fraught with extensive utility to my fellow creatures , then could I live to fulfil it ."
]
},
"human": {
"freq": 2,
"sentences": [
"The dissecting room and the slaughter-house furnished many of my materials ; and often did my human nature turn with loathing from my occupation , whilst , still urged on by an eagerness which perpetually increased , I brought my work near to a conclusion .",
"`` Night quickly shut in , but to my extreme wonder , I found that the cottagers had a means of prolonging light by the use of tapers , and was delighted to find that the setting of the sun did not put an end to the pleasure I experienced in watching my human neighbours ."
]
}
},
"the": {
"fellow": {
"freq": 1,
"sentences": [
"`` D -- n the fellow ! ''"
]
},
"human": {
"freq": 11,
"sentences": [
"Wealth was an inferior object , but what glory would attend the discovery if I could banish disease from the human frame and render man invulnerable to any but a violent death !",
"One of the phenomena which had peculiarly attracted my attention was the structure of the human frame , and , indeed , any animal endued with life .",
"I became acquainted with the science of anatomy , but this was not sufficient ; I must also observe the natural decay and corruption of the human body .",
"My attention was fixed upon every object the most insupportable to the delicacy of the human feelings .",
"I collected bones from charnel-houses and disturbed , with profane fingers , the tremendous secrets of the human frame .",
"If the study to which you apply yourself has a tendency to weaken your affections and to destroy your taste for those simple pleasures in which no alloy can possibly mix , then that study is certainly unlawful , that is to say , not befitting the human mind .",
"Chapter 9 Nothing is more painful to the human mind than , after the feelings have been worked up by a quick succession of events , the dead calmness of inaction and certainty which follows and deprives the soul both of hope and fear .",
"But on you only had I any claim for pity and redress , and from you I determined to seek that justice which I vainly attempted to gain from any other being that wore the human form .",
"But that can not be ; the human senses are insurmountable barriers to our union .",
"My father , who was watching over me , perceiving my restlessness , awoke me ; the dashing waves were around , the cloudy sky above , the fiend was not here : a sense of security , a feeling that a truce was established between the present hour and the irresistible , disastrous future imparted to me a kind of calm forgetfulness , of which the human mind is by its structure peculiarly susceptible .",
"Nothing is so painful to the human mind as a great and sudden change ."
]
}
}
}
\ No newline at end of file
import os
from gensim.models import Word2Vec
# based on the following tutorial: https://rare-technologies.com/word2vec-tutorial/
class MySentences(object):
def __init__(self, dirname):
self.dirname = dirname
def __iter__(self):
for fname in os.listdir(self.dirname):
for line in open(os.path.join(self.dirname, fname)):
# more text-processing steps can be added here
# for example: "convert to unicode, lowercase, remove numbers, extract named entities…"
print(line.split())
yield line.split() # returns a list of words
sentences = MySentences('./input/test')
model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
# model = Word2Vec(sentences, size=1, window=1, min_count=1, workers=1)
print(model)
import sys
from nltk import ngrams
# text = 'this is a test sentence for now'
text = sys.stdin.read()
# out = ngrams(text, 3)
n = 3
out = [text[i:i+n] for i in range(len(text)-n+1)]
print list(out)
out = str(out)
sys.stdout.write(out)
# [('this', 'is', 'a'), ('is', 'a', 'test'), ('a', 'test', 'sentence'), ('test', 'sentence', 'for'), ('sentence', 'for', 'now')]
import sys
# >>> std.in
# text = 'this is a test sentence for now'
text = sys.stdin.read()
# >>> tokenizer
from nltk.tokenize import TreebankWordTokenizer
tokenizer = TreebankWordTokenizer()
tokens = tokenizer.tokenize(text)
# >>> ngrams
# from nltk import ngrams
# out = ngrams(tokens, 3)
n = 3
out = [tokens[i:i+n] for i in range(len(tokens)-n+1)]
# print list(out)
# [('this', 'is', 'a'), ('is', 'a', 'test'), ('a', 'test', 'sentence'), ('test', 'sentence', 'for'), ('sentence', 'for', 'now')]
# >>> std.out
out = str(out)
sys.stdout.write(out)
\ No newline at end of file
import sys
from nltk.tokenize import TreebankWordTokenizer
# text = 'this is a test sentence for now'
text = sys.stdin.read()
tokenizer = TreebankWordTokenizer()
out = tokenizer.tokenize(text)
out = str(out)
sys.stdout.write(out)
print out
\ No newline at end of file
import sys
import nltk
from nltk.tokenize import TreebankWordTokenizer, sent_tokenize
from nltk import ngrams
from collections import Counter
import json
# # >>> std.in
# text = sys.stdin.read()
# text = text, errors='ignore'
# print(text)
text = open('input/frankenstein_gutenberg_plain.txt','r').read()
# >>> tokenizer
sentences = sent_tokenize(text)
print(sentences)
tokenizer = TreebankWordTokenizer()
data = []
for sentence in sentences:
tokens = tokenizer.tokenize(sentence)
data.append(tokens)
# print(tokens)
# print(data)
# [['this','is','a','sentence'], ['this','is','another','sentence']]
# >>> ngrams
# ngrams = ngrams(tokens, 3)
n = 3
ngrams = []
for sentence_tokens in data:
current_ngrams = [sentence_tokens[i:i+n] for i in range(len(sentence_tokens)-n+1)]
for ngram in current_ngrams:
ngrams.append(ngram)
# print(list(ngrams))
# [('this', 'is', 'a'), ('is', 'a', 'test'), ('a', 'test', 'sentence'), ('test', 'sentence', 'for'), ('sentence', 'for', 'now')]
# >>> filter
targets = ['human', 'fellow']
ngrams1 = []
ngrams2 = []
counter1 = Counter()
counter2 = Counter()
# Counter({u'black': 380, u'of': 76, u'the': 50, u'militant': 25, u'people': 21, ... })
for ngram in ngrams:
if targets[0] == ngram[1]:
ngrams1.append(ngram)
for word in ngram:
# print(word)
counter1[word] += 1
if targets[1] == ngram[1]:
ngrams2.append(ngram)
for word in ngram:
counter2[word] += 1
# print('\n---------\n')
# for ngram in ngrams1:
# print(ngram)
# print('\n---------\n'
# for ngram in ngrams2:
# print(ngram)
# print('\n---------\n'
# print(counter1)
# print('\n---------\n'
# print(counter2)
# print('\n---------\n'
out = {}
for word, value in counter1.items():
# if the word is not the target word
if word != any((targets[0], targets[1])):
# print(word, value)
# if the word also appeared in the window of the second target word
if word in counter2:
val1 = value
val2 = counter2[word]
# print(word, val1, val2)
sentences1 = []
sentences2 = []
bigrams1 = [' '+word+' '+targets[0]+' ', ' '+targets[0]+' '+word+' ']
bigrams2 = [' '+word+' '+targets[1]+' ', ' '+targets[1]+' '+word+' ']
for sentence in data:
if targets[0] in sentence:
index = sentence.index(targets[0])
if sentence[index-1] == word:
sentences1.append(' '.join(sentence))
if sentence[index+1] == word:
sentences1.append(' '.join(sentence))
if targets[1] in sentence:
index = sentence.index(targets[1])
if sentence[index-1] == word:
sentences2.append(' '.join(sentence))
if sentence[index+1] == word:
sentences2.append(' '.join(sentence))
out[word] = {
targets[0] : {
'freq' : val1,
'sentences': sentences1
},
targets[1] : {
'freq' : val2,
'sentences' : sentences2
}
}
# out = {
# 'of' : {
# 'human' : {
# freq : 23,
# sentences: [
# 'sentence 1',
# 'sentence 2'
# ]
# },
# 'black' : {
# freq : 111,
# sentences: [
# 'sentence 1',
# 'sentence 2'
# ]
# }
# }
# }
# >>> std.out
out = json.dumps(out, sort_keys=True, indent=4, separators=(',', ': '))
sys.stdout.write(out)
\ No newline at end of file
This diff is collapsed.
Tensor("Variable/read:0", shape=(5000, 128), dtype=float32, device=/device:CPU:0)
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
[]
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment