"use strict";
const fs = require("fs");
/**
* Find the next character of a word.
* @memberOf module:index
* @private
* @param {Map} map - A map of probabilities.
* @param {String[]} word - The array of preceding characters of the current word.
* @return {String} The next character.
*/
function findChar(map, word) {
let _lastChar = word.splice(word.length - 1,1);
map = map.get(`${_lastChar}`);
if (typeof map.values().next().value == "number") {
let _rand = Math.random();
let _acc = 0;
for (var [key, val] of map) {
_acc += val;
if (_rand <= _acc) {
return key;
}
}
} else {
return findChar(map, word);
}
}
/**
* Normalize the map of probabilities for each character chain from 0 to 1.
* This is used to normalize the probability of each character to appear after a specific chain of characters in a scale form 0 to 1 (float number) instead of random integers.
* @memberOf module:index
* @private
* @param {Map} map - The map of probabilities.
*/
function normalize(map) {
if (typeof map.values().next().value == "number") {
let _map = [...map];
let _acc = _map.reduce((acc, char) => acc + char[1], 0);
map.forEach((val, key) => {
map.set(`${key}`, val / _acc);
});
} else {
map.forEach((val, key) => {
let _map = map.get(`${key}`);
normalize(_map);
});
}
}
/**
* Increment probabilities of a character to appear after a specific chain of characters.
* @memberOf module:index
* @private
* @param {String[]} arr - The array of characters preceding the current character.
* @param {Map} map - The map of probabilities.
* @param {String} char - The character to increment.
*/
function incStat(arr, map, char) {
let _lastChar = arr.splice(arr.length-1,1);
let _map = map.get(`${_lastChar}`);
if (arr.length > 0) {
incStat(arr, _map, char);
} else {
if (!_map.has(`${char}`)) {
_map.set(`${char}`, 0);
}
_map.set(`${char}`, _map.get(`${char}`) + 1);
}
}
/**
* Generate the tree of probabilities for each substring passed in.
* @memberOf module:index
* @private
* @param {String[]} arr - The array of characters from the substring.
* @param {Map} map - A map of probabilities.
* @param {Set} chars - The set of all used chars.
*/
function genStats(arr, map, chars) {
if (arr.length > 0) {
let _char = arr.splice(arr.length - 1,1);
if (!map.has(`${_char}`)) {
map.set(`${_char}`, new Map());
}
map = map.get(`${_char}`);
genStats(arr, map, chars);
}
}
/**
* Module exporting the {@Link module:index~Generator Generator} class.
* @module
* @return {Generator} {@Link module:index~Generator Generator} class.
*/
class Generator {
/**
* Create a generator instance.
* @param {number} [markovLen] - The length of the Markov chain.
* This parameter defines the depth of the probability chain (2 would mean that a character will be chosen depending on the 2 preceding characters).
* 2 to 3 are usually fine values.
* Highest values will give you less variety in the generated words since they would look just like in the original word list.
* Avoid high values unless you turn config.allowExist back on.
* @param {object} [config] - The configuration of the generator.
* @param {String} [config.wordStart="!"] - The default character at the beginning of a word when analyzing/generating.
* Use one that is not in any word of the original list.
* This character is used internally, please do not add it in the original word list that you pass in (good word: "doctor", NOT good: "!doctor?").
* It's just meant to mark the starting point of a word.
* Note: If this character ("!") may appear in you original list, change it to something that don't. Ex.: "£" or "$".
* @param {String} [config.wordEnd="?"] - The default character at the end of a word when analyzing/generating.
* Use one that is not in any word of the original list (as above).
* @param {number} [config.minLength=1] - The minimum length of a word.
* @param {number} [config.maxLength=20] - The maximum length of a word.
* @param {number} [config.timeout=1000] - The timeout (in ms) of the {@link module:index~Generator#genWord genWord} and {@link module:index~Generator#genSet genSet} methods, just in case of huge list generation.
* Tune it when you generate huge lists, or if you use "Infinity" as a parameter in the {@link module:index~Generator#genSet genSet} method.
* @param {boolean} [config.allowExist=false] - Allow to add existing words (from the original word list).
*/
constructor(markovLen = 2, config = {}) {
let _config = {
wordStart: "!",
wordEnd: "?",
minLength: 1,
maxLength: 20,
timeout: 1000,
allowExist: false
};
this.config = Object.assign(_config, config);
this.markovLen = markovLen;
this.stats = new Map();
this.originWords = new Set();
}
/**
* Analyse the word list passed in.
* The use of this method is mandatory before using the {@link module:index~Generator#genWord genWord} or {@link module:index~Generator#genSet genSet} methods since you have to compute the probabilities of each character to appear before being able to generate words.
* @param {Set} words - The word list.
* @example
* Generator.analyze(Set { "home", "coding", "generator" });
*/
analyze(words) {
let _chars = new Set();
let _subStrings = new Set();
let _words = new Set();
words.forEach((word) => {
this.originWords.add(word);
_words.add(`${this.config.wordStart}${word}${this.config.wordEnd}`);
});
_words.forEach((word) => {
let _word = word;
for (let c = 0; c < _word.length; c++) {
_chars.add(_word.charAt(c));
let _subString = new String();
for (let x = Math.min(this.markovLen - 1, c); x >= 0; x--) {
_subString += _word.charAt(c - x);
}
_subStrings.add(_subString);
}
});
_subStrings.forEach((subString) => {
let _subString = [...subString];
genStats(_subString, this.stats, _chars);
});
_words.forEach((word) => {
let _word = [...word];
for (let c = 1; c < word.length; c++) {
let _subString = _word.slice(Math.max(c - this.markovLen, 0), c + 1);
let _char = _subString.splice(_subString.length - 1);
incStat(_subString, this.stats, _char);
}
});
normalize(this.stats);
}
/**
* Generate a word.
* @return {String} The word generated.
* @example
* Generator.genWord();
* // returns: "generator"
*/
genWord() {
let _timeout = Date.now() + this.config.timeout;
while (Date.now() <= _timeout) {
let _word = [this.config.wordStart];
while (_word[_word.length - 1] !== this.config.wordEnd) {
let _tmpWord = _word.slice(0);
_word[_word.length] = findChar(this.stats, _tmpWord);
}
let _regex = new RegExp(`\\${this.config.wordStart}|\\${this.config.wordEnd}`, "g");
_word = _word.join("").replace(_regex, "");
if ((_word.length >= this.config.minLength) && (_word.length <= this.config.maxLength) && (this.config.allowExist || !this.originWords.has(_word))) {
return _word;
}
}
}
/**
* Generate a word list.
* @param {(number|Infinity)} [nb=10] - Length of the word list to generate.
* @return {Set} The word list.
* @example
* Generator.generate(3);
* // returns: Set { "home", "coding", "generator" }
*/
genSet(nb = 10) {
let _timeout = Date.now() + this.config.timeout;
let _words = new Set();
while ((_words.size < nb) && (Date.now() <= _timeout)) {
_words.add(this.genWord());
}
return _words;
}
/**
* Save the map of probabilities.
* @param {String} path - Path of the savefile to write.
* @param {function(data)} cb - Callback after save. "data" is the json result of the probabilities map (the content of the savefile).
*/
saveStats(path, cb = null) {
function replacer (key, val) {
if (val.__proto__ === Map.prototype) {
return {
_type: "map",
map: [...val]
};
} else {
return val;
}
}
let _data = JSON.stringify(this.stats, replacer);
fs.writeFile(path, _data, (err) => {
if(err) {
console.log(err);
} else if (cb) {
cb(_data);
}
});
}
/**
* Load the map of probabilities.
* @param {String} path - Path of the savefile to load.
* @param {function()} cb - Callback after savefile has been loaded.
*/
loadStats(path, cb = null) {
function reviver (key, val) {
if (val._type === "map") {
return new Map(val.map);
} else {
return val;
}
}
fs.readFile(path, (err, data) => {
if(err) {
console.log(err);
} else {
this.stats = JSON.parse(data, reviver);
if (cb) {
cb();
}
}
});
}
}
module.exports = exports = Generator;