var util = require('./module/Util')
var wordDictionary = [];
wordDictionary['en'] = require('../dictionary/default.json');
// try to import optional dictionaries
try { wordDictionary['fr'] = require('french-badwords-list').array; } catch (e) {}
try { wordDictionary['ru'] = require('russian-bad-words').flatWords; } catch (e) {}
var words = util.clone(wordDictionary['en'])
/**
* LeoProfanity
*
* @constructor
*/
var LeoProfanity = {
/**
* Remove word from the list
*
* @private
* @param {string} str - word
*/
removeWord: function (str) {
var index = words.indexOf(str);
if (index !== -1) {
words.splice(index, 1);
}
return this;
},
/**
* Add word into the list
*
* @private
* @param {string} str - word
*/
addWord: function (str) {
if (words.indexOf(str) === -1) {
words.push(str);
}
return this;
},
/**
* Return replacement word from key
*
* @example
* // output: '***'
* getReplacementWord('*', 3)
*
* // output: '----'
* getReplacementWord('-', 4)
*
* @private
* @param {string} key
* @param {number} n
* @returns string
*/
getReplacementWord: function (key, n) {
var i = 0;
var replacementWord = '';
for (i = 0; i < n; i++) {
replacementWord += key;
}
return replacementWord;
},
/**
* Sanitize string for this project
* 1. Convert to lower case
* 2. Replace comma and dot with space
*
* @private
* @param {string} str
* @returns {string}
*/
sanitize: function (str) {
str = str.toLowerCase();
/* eslint-disable */
str = str.replace(/\.|,/g, ' ');
return str;
},
/**
* Return all current profanity words
*
* @example
* filter.list();
*
* @public
* @returns {Array.string}
*/
list: function () {
return words;
},
/**
* Check the string contain profanity words or not
* Approach, to make it fast ASAP.
* Check out more cases on "clean" method
*
* @example
* // output: true
* filter.check('I have boob');
*
* @see http://stackoverflow.com/questions/26425637/javascript-split-string-with-white-space
* @see http://stackoverflow.com/questions/6116474/how-to-find-if-an-array-contains-a-specific-string-in-javascript-jquery
* @see http://stackoverflow.com/questions/9141951/splitting-string-by-whitespace-without-empty-elements
* @public
* @param {string} str
* @returns {boolean}
*/
check: function (str) {
if (!str) return false;
var i = 0;
var isFound = false;
str = this.sanitize(str);
// convert into array and remove white space
// add default returned value for some cases (e.g. "." will returns null)
strs = str.match(/[^ ]+/g) || [];
while (!isFound && i <= words.length - 1) {
if (strs.includes(words[i])) isFound = true;
i++;
}
return isFound;
},
/**
* Internal proceeding method
*
* @todo improve algorithm
* @see http://stackoverflow.com/questions/26425637/javascript-split-string-with-white-space
* @private
* @param {string} str
* @param {string} [replaceKey=*] one character only
* @param {string} [nbLetters=0] number of ignoring letters from the beginning
* @returns {string}
*/
proceed: function (str, replaceKey, nbLetters) {
if (!str) return '';
if (typeof replaceKey === 'undefined') replaceKey = '*';
if (typeof nbLetters === 'undefined') nbLetters = 0;
var self = this;
var originalString = str;
var result = str;
var sanitizedStr = this.sanitize(originalString);
// split by whitespace (keep delimiter)
// (cause comma and dot already replaced by whitespace)
var sanitizedArr = sanitizedStr.split(/(\s)/);
// split by whitespace, comma and dot (keep delimiter)
var resultArr = result.split(/(\s|,|\.)/);
// loop through given string
var badWords = [];
sanitizedArr.forEach(function (item, index) {
if (words.includes(item)) {
var replacementWord = item.slice(0, nbLetters) + self.getReplacementWord(replaceKey, item.length - nbLetters);
badWords.push(resultArr[index]);
resultArr[index] = replacementWord;
}
});
// combine it
result = resultArr.join('');
return [result, badWords];
},
/**
* Replace profanity words
*
* @example
* // no bad word
* // output: I have 2 eyes
* filter.clean('I have 2 eyes');
*
* // normal case
* // output: I have ****, etc.
* filter.clean('I have boob, etc.');
*
* // case sensitive
* // output: I have ****
* filter.clean('I have BoOb');
*
* // separated by comma and dot
* // output: I have ****.
* filter.clean('I have BoOb.');
*
* // multi occurrence
* // output: I have ****,****, ***, and etc.
* filter.clean('I have boob,boob, ass, and etc.');
*
* // should not detect unspaced-word
* // output: Buy classic watches online
* filter.clean('Buy classic watches online');
*
* // clean with custom replacement-character
* // output: I have ++++
* filter.clean('I have boob', '+');
*
* // support "clear letter" in the beginning of the word
* // output: I have bo++
* filter.clean('I have boob', '+', 2);
*
* @public
* @param {string} str
* @param {string} [replaceKey=*] one character only
* @param {string} [nbLetters=0] number of ignoring letters from the beginning
* @returns {string}
*/
clean: function (str, replaceKey, nbLetters) {
if (!str) return '';
if (typeof replaceKey === 'undefined') replaceKey = '*';
if (typeof nbLetters === 'undefined') nbLetters = 0;
return this.proceed(str, replaceKey, nbLetters)[0];
},
/**
* Get list of used bad/profanity words
*
* @example
* // should return original string if string not contain profanity word
* // output: []
* filter.badWordsUsed('I have 2 eyes')
*
* // should found profanity word
* // output: ['zoophilia']
* filter.badWordsUsed('lorem zoophilia ipsum')
*
* // should detect case sensitive
* // output: ['BoOb']
* filter.badWordsUsed('I have BoOb')
*
* // should detect multi occurrence
* // output: ['boob', 'boob', 'ass']
* filter.badWordsUsed('I have boob,boob, ass, and etc.')
*
* // should not detect unspaced-word
* // output: []
* filter.badWordsUsed('Buy classic watches online')
*
* // should detect multi-length-space and multi-space
* // output: ['BoOb']
* filter.badWordsUsed(',I h a. v e BoOb.')
*
* @public
* @param {string} str
* @returns {Array.string}
*/
badWordsUsed: function (str) {
if (!str) return [];
return this.proceed(str, '*')[1];
},
/**
* Add word to the list
*
* @example
* // add word
* filter.add('b00b');
*
* // add word's array
* // check duplication automatically
* filter.add(['b00b', 'b@@b']);
*
* @public
* @param {string|Array.string} data
*/
add: function (data) {
var self = this;
if (typeof data === 'string') {
self.addWord(data);
} else if (data.constructor === Array) {
data.forEach(function (word) {
self.addWord(word);
});
}
return this;
},
/**
* Remove word from the list
*
* @example
* // remove word
* filter.remove('b00b');
*
* // remove word's array
* filter.remove(['b00b', 'b@@b']);
*
* @public
* @param {string|Array.string} data
*/
remove: function (data) {
var self = this;
if (typeof data === 'string') {
self.removeWord(data);
} else if (data.constructor === Array) {
data.forEach(function (word) {
self.removeWord(word);
});
}
return this;
},
/**
* Reset word list by using en dictionary
* (also remove word that manually add)
*
* @public
*/
reset: function () {
this.loadDictionary('en');
return this;
},
/**
* Clear all words in the list
*
* @public
*/
clearList: function () {
words = [];
return this;
},
/**
* Return word list from dictionary
*
* @example
* // returns words in en dictionary
* filter.getDictionary();
*
* // returns words in fr dictionary
* filter.getDictionary('fr');
*
* @public
* @param {string} [name=en] dictionary name
* @returns {Array.string}
*/
getDictionary: function (name = 'en') {
name = (name in wordDictionary) ? name : 'en';
return wordDictionary[name]
},
/**
* Add dictionary
*
* @todo complete it
* @private
* @param {string} name
* @param {Array.string} data
*/
addDictionary: function (name, data) {
},
/**
* Load word list from dictionary to using in the filter
*
* @example
* // replace current dictionary with the french one
* filter.loadDictionary('fr');
*
* // replace dictionary with the default one (same as filter.reset())
* filter.loadDictionary();
*
* @public
* @param {string} [name=en]
*/
loadDictionary: function (name = 'en') {
words = util.clone(this.getDictionary(name))
},
};
module.exports = LeoProfanity;