Commit 1ef3cb3a authored by ana's avatar ana

adding linear regression folder & scripts

parent 773f5ef4
,ana,anagram,16.11.2018 17:12,file:///home/ana/.config/libreoffice/4;
\ No newline at end of file
,ana,anagram,16.11.2018 17:03,file:///home/ana/.config/libreoffice/4;
\ No newline at end of file
creature,distance,score
interesting,0,9
noble,0,8
noble,1,8
unfashioned,0,3
human,0,5
helpless,0,2
pictured,2,5
guilty,5,1
yellow,3,5
alive,11,8
little,0,4
unsocial,3,3
happy,0,9
capable,0,9
pretended,8,3
happy,0,9
capable,0,9
human,0,5
unfit,0,2
bound,4,2
mild,5,6
most,6,6
fallen,11,2
favourable,3,6
desert,10,4
eternal,3,7
speedy,6,5
happy,8,9
young,0,5
fair,0,9
excellent,0,10
lovely,0,10
lovely,0,10
high,1,5
omnipotent,4,8
perfect,0,10
lovely,0,10
amiable,0,8
deserted,0,2
excellent,0,10
human,0,5
generous,30,9
little,0,4
beautiful,0,9
whole,7,8
another,1,5
fine,1,6
inaccessible,18,3
long,15,5
whole,8,6
tremulous,30,2
greater,13,8
future,2,5
insuperable,6,3
half finished,0,3
shuddering,6,2
human,0,5
angelic,2,7
every,0,5
purest,0,8
future,2,5
every,6,5
impenetrable,7,5
glorious,0,10
useful,3,8
extensive,4,7
rational,0,5
first,0,6
same,0,5
DISTANCE,VALUEADJ
0,9
0,8
1,8
0,3
0,5
0,2
2,5
5,1
3,5
11,8
0,4
3,3
0,9
0,9
8,3
0,9
0,9
0,5
0,2
4,2
5,6
6,6
11,2
3,6
10,4
3,7
6,5
8,9
0,5
0,9
0,10
0,10
0,10
1,5
4,8
0,10
0,10
0,8
0,2
0,10
0,5
30,9
0,4
0,9
7,8
1,5
1,6
18,3
15,5
8,6
30,2
13,8
2,5
6,3
0,3
6,2
0,5
2,7
0,5
0,8
2,5
6,5
7,5
0,10
3,8
4,7
0,5
0,6
0,5
ADJECTIVE,VALUEADJ,DISTANCE
miserable,0,0
more,7,4
poor,1,11
unremitting,6,14
last,5,13
daily,6,3
each,5,2
abhorred,0,0
bitterest,1,8
all,5,7
hideous,1,1
detestable,0,0
ugly,3,0
hideous,1,0
true,9,3
deadly,1,20
fond,9,7
remote,4,16
firm,5,9
sanguinary,1,3
tormented,2,9
clouded,4,3
dearest,10,9
inevitable,3,5
magic,8,2
fiendish,1,7
miserable,1,5
mistaken,3,16
gigantic,9,0
greater,8,8
connected,8,2
wild,9,3
VALUEADJ,DISTANCE
0,0
7,4
1,11
6,14
5,13
6,3
5,2
0,0
1,8
5,7
1,1
0,0
3,0
1,0
9,3
1,20
9,7
4,16
5,9
1,3
2,9
4,3
10,9
3,5
8,2
1,7
1,5
3,16
9,0
8,8
8,2
9,3
# -*- coding: utf-8 -*-
# following this manual: https://towardsdatascience.com/linear-regression-using-python-ce21aa90ade6
# using simple linear regression, trying to predict a continuous variable
import pandas as pd
import numpy as np
import tkinter
import matplotlib.pyplot as plt #Data visualisation libraries
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
# import data
test_data = pd.read_csv('creature_data_nrs.csv')
# look at data
# test_data.head()
# test_data.info()
# test_data.describe()
# test_data.columns
# shows all the data in graphs
#graph1 = sns.pairplot(test_data)
#plt.show()
# shows the histogram to see the distribution of the target variable
# graph2 = sns.distplot(test_data['VALUEADJ'])
# plt.show()
# find the correlation between variables in dataset
#correlation = test_data.corr()
# Create heatmap
# The black colour represents that there is no linear relationship between the two variables.
# A lighter shade shows that the relationship between the variables is more linear.
# graph3 = sns.heatmap(correlation)
# plt.show()
# Split data in training data (independent variable / distance) & labels (predicted variable / positivity rate adj)
X = test_data[['DISTANCE']]
y = test_data[['VALUEADJ']]
# Split data into train & test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)
# define model Linear Regression
model = LinearRegression()
# train model with the training data
model.fit(X_train,y_train)
# predictions check
predictions = model.predict(X_test)
# visualise predictions
# graph4 = plt.scatter(y_test,predictions)
# plt.show()
# Returns the coefficient of determination R^2 of the prediction.
scores = model.score(X, y)
print(scores)
# Plot outputs
plt.scatter(X_test, y_test, color='black')
plt.plot(X_test, predictions, color='blue', linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment