Commit 5af05e84 by ISABEL\MBrunet

Add node WIP code from tf-idf workshop

parent e763aa15
node_modules
outputWords.json
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
const fs = require("fs");
function readModuleFile(path, callback) {
try {
const filename = require.resolve(path);
fs.readFile(filename, "utf8", callback);
} catch (e) {
callback(e);
}
}
function getFrequency(str) {
const wordCounts = {};
const words = str.split(/\b/);
const stopWords = [
"i",
"me",
"my",
"myself",
"we",
"our",
"ours",
"ourselves",
"you",
"your",
"yours",
"yourself",
"yourselves",
"he",
"him",
"his",
"himself",
"she",
"her",
"hers",
"herself",
"it",
"its",
"itself",
"they",
"them",
"their",
"theirs",
"themselves",
"what",
"which",
"who",
"whom",
"this",
"that",
"these",
"those",
"am",
"is",
"are",
"was",
"were",
"be",
"been",
"being",
"have",
"has",
"had",
"having",
"do",
"does",
"did",
"doing",
"a",
"an",
"the",
"and",
"but",
"if",
"or",
"because",
"as",
"until",
"while",
"of",
"at",
"by",
"for",
"with",
"about",
"against",
"between",
"into",
"through",
"during",
"before",
"after",
"above",
"below",
"to",
"from",
"up",
"down",
"in",
"out",
"on",
"off",
"over",
"under",
"again",
"further",
"then",
"once",
"here",
"there",
"when",
"where",
"why",
"how",
"all",
"any",
"both",
"each",
"few",
"more",
"most",
"other",
"some",
"such",
"no",
"nor",
"not",
"only",
"own",
"same",
"so",
"than",
"too",
"very",
"s",
"t",
"can",
"will",
"just",
"don",
"should",
"now"
];
words.forEach(word => {
if (!stopWords.includes(word.toLowerCase()) && word.length > 2) {
wordCounts[word.toLowerCase()] = (wordCounts[word] || 0) + 1;
}
});
Object.keys(wordCounts).forEach(word => {
if (wordCounts[word] < 30) {
delete wordCounts[word];
}
});
return wordCounts;
}
readModuleFile("./books/pride-and-prejudice.txt", function(err, words) {
fs.writeFile(
"./outputWords.json",
JSON.stringify(getFrequency(words)),
function(err) {
if (err) {
return console.log(err);
}
console.log("The file was saved!");
}
);
});
var natural = require("natural");
var TfIdf = natural.TfIdf;
var tfidf = new TfIdf();
tfidf.addFileSync("books/heart-of-darkness.txt");
tfidf.addFileSync("books/pride-and-prejudice.txt");
tfidf.addFileSync("books/the-importance-of-being-earnest.txt");
console.log("marry --------------------------------");
tfidf.tfidfs("marry", function(i, measure) {
console.log("document #" + i + " is " + measure);
});
console.log("lady --------------------------------");
tfidf.tfidfs("lady", function(i, measure) {
console.log("document #" + i + " is " + measure);
});
{
"name": "tf-idf",
"version": "1.0.0",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
"apparatus": {
"version": "0.0.9",
"resolved": "https://registry.npmjs.org/apparatus/-/apparatus-0.0.9.tgz",
"integrity": "sha1-N9zSWDStC2UQdllikduCPusZCL0=",
"requires": {
"sylvester": "0.0.21"
}
},
"bindings": {
"version": "1.3.0",
"resolved": "http://nexus.isabelteam.be/v3/repository/npm-group/bindings/-/bindings-1.3.0.tgz",
"integrity": "sha512-DpLh5EzMR2kzvX1KIlVC0VkC3iZtHKTgdtZ0a3pglBZdaQFjt5S9g9xd1lE+YvXyfd6mtCeRnrUfOLYiTMlNSw==",
"optional": true
},
"nan": {
"version": "2.10.0",
"resolved": "http://nexus.isabelteam.be/v3/repository/npm-group/nan/-/nan-2.10.0.tgz",
"integrity": "sha512-bAdJv7fBLhWC+/Bls0Oza+mvTaNQtP+1RyhhhvD95pgUJz6XM5IzgmxOkItJ9tkoCiplvAnXI1tNmmUD/eScyA==",
"optional": true
},
"natural": {
"version": "0.5.6",
"resolved": "https://registry.npmjs.org/natural/-/natural-0.5.6.tgz",
"integrity": "sha1-8hUrYfNr78b+Dta8gR+0mebp3M0=",
"requires": {
"apparatus": "0.0.9",
"sylvester": "0.0.21",
"underscore": "1.9.0",
"webworker-threads": "0.7.13"
}
},
"sylvester": {
"version": "0.0.21",
"resolved": "https://registry.npmjs.org/sylvester/-/sylvester-0.0.21.tgz",
"integrity": "sha1-KYexzivS84sNzio0OIiEv6RADqc="
},
"underscore": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/underscore/-/underscore-1.9.0.tgz",
"integrity": "sha512-4IV1DSSxC1QK48j9ONFK1MoIAKKkbE8i7u55w2R6IqBqbT7A/iG7aZBCR2Bi8piF0Uz+i/MG1aeqLwl/5vqF+A=="
},
"webworker-threads": {
"version": "0.7.13",
"resolved": "http://nexus.isabelteam.be/v3/repository/npm-group/webworker-threads/-/webworker-threads-0.7.13.tgz",
"integrity": "sha1-yEsYtrokElu503NC5E3rgVFi+4M=",
"optional": true,
"requires": {
"bindings": "1.3.0",
"nan": "2.10.0"
}
}
}
}
{
"name": "tf-idf",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"natural": "^0.5.6"
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment