核心代码
import * as fs from 'fs';
import * as path from 'path';
import * as readline from 'readline';
export default class BadWords {
private static _instance: BadWords;
private data: Array<string> = [];
constructor () {
const files = fs.readdirSync(path.resolve(__dirname, './libs/'));
files.forEach(file => {
if (path.extname(file) === '.txt') {
const rl = readline.createInterface({
input: fs.createReadStream(path.resolve(__dirname, './libs/', file))
});
rl.on('line', line => {
if (line) {
if (line.substring(line.length - 1) === ',') {
line = line.substring(0, line.length - 1);
}
this.data.push(line);
}
});
}
});
}
public static get instance () {
if (!BadWords._instance) {
BadWords._instance = new BadWords();
}
return BadWords._instance;
}
filter (content: string) {
const tempImgs: Array<string> = [];
let result = content;
// 将图片部分放入临时数组中,以避免待会儿过滤词会匹配到图片内容
result = result.replace(/!\[.+\]\(.+\)/g, val => {
tempImgs.push(val);
return '#IMG#';
});
// 匹配过滤词并用*替换
this.data.forEach(keyword => {
if (result.toUpperCase().includes(keyword.toUpperCase())) {
const asterisks: Array<'*'> = [];
for (let i = 0; i < keyword.length; i++) {
asterisks.push('*');
}
result = result.replace(new RegExp(keyword, 'gi'), asterisks.join(''));
}
});
// 恢复图片
if (tempImgs.length) {
result = result.replace(/#IMG#/g, () => <string>tempImgs.shift());
}
return result;
}
}
在需要使用的地方引入
import BadWords from '../../common/badWords/index';
const badWords = BadWords.instance;
badWords.filter(userName)
详细资源可点击下方链接下载
每个 `txt` 文件代表一个类型的敏感词词库,词库数据来源:
https://github.com/fwwdn/sensitive-stop-words
https://github.com/fighting41love/funNLP/tree/master/data/%E6%95%8F%E6%84%9F%E8%AF%8D%E5%BA%93