node实现敏感词过滤及敏感词库_数组

核心代码 

import * as fs from 'fs';
import * as path from 'path';
import * as readline from 'readline';

export default class BadWords {
    private static _instance: BadWords;
    private data: Array<string> = [];

    constructor () {
      const files = fs.readdirSync(path.resolve(__dirname, './libs/'));
      files.forEach(file => {
        if (path.extname(file) === '.txt') {
          const rl = readline.createInterface({
            input: fs.createReadStream(path.resolve(__dirname, './libs/', file))
          });
          rl.on('line', line => {
            if (line) {
              if (line.substring(line.length - 1) === ',') {
                line = line.substring(0, line.length - 1);
              }
              this.data.push(line);
            }
          });
        }
      });
    }

    public static get instance () {
      if (!BadWords._instance) {
        BadWords._instance = new BadWords();
      }
      return BadWords._instance;
    }

    filter (content: string) {
      const tempImgs: Array<string> = [];
      let result = content;

      // 将图片部分放入临时数组中,以避免待会儿过滤词会匹配到图片内容
      result = result.replace(/!\[.+\]\(.+\)/g, val => {
        tempImgs.push(val);
        return '#IMG#';
      });

      // 匹配过滤词并用*替换
      this.data.forEach(keyword => {
        if (result.toUpperCase().includes(keyword.toUpperCase())) {
          const asterisks: Array<'*'> = [];
          for (let i = 0; i < keyword.length; i++) {
            asterisks.push('*');
          }
          result = result.replace(new RegExp(keyword, 'gi'), asterisks.join(''));
        }
      });

      // 恢复图片
      if (tempImgs.length) {
        result = result.replace(/#IMG#/g, () => <string>tempImgs.shift());
      }
      return result;
    }
}

在需要使用的地方引入

import BadWords from '../../common/badWords/index';
const badWords = BadWords.instance;

badWords.filter(userName)

详细资源可点击下方链接下载

每个 `txt` 文件代表一个类型的敏感词词库,词库数据来源:  

https://github.com/fwwdn/sensitive-stop-words  

https://github.com/fighting41love/funNLP/tree/master/data/%E6%95%8F%E6%84%9F%E8%AF%8D%E5%BA%93