一、什么是敏感词过滤?

敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。
通常,可以使用两种方法来过滤敏感词:

  1. 黑名单过滤:即定义一个黑名单,将所有敏感词择记录在其中,然后对输入的文本进行对比,如果发现有敏感词,就将其过滤掉。
  2. 白名单过滤:即定义一个白名单,将所有不敏感的词汇记录在其中,然后对输入的文本进行对比,如果发现有不在白名单中的词汇,就将其过滤掉。

二、ToolGood.Words是什么?

ToolGood.Words是一款高性能非法词(敏感词)检测组件,附带繁体简体互换,支持全角半角互换,获取拼音首字母,获取拼音字母,拼音模糊搜索等功能。
ToolGood.Words的源码网站:ToolGood.Words源码网站

三、在Visual Studio中安装ToolGood.Words

3.1、右键项目解决方案,选择“管理NuGet程序包”,如下图所示:

白名单列表过滤请求url java 白名单过滤是什么意思_System

3.2、切换到“浏览”选项卡,搜索“ToolGood.Words”并安装:

白名单列表过滤请求url java 白名单过滤是什么意思_白名单列表过滤请求url java_02


安装完之后最好重新编译生成项目

四、创建“subContentCheck”类

敏感/违禁词汇因特殊内容不便上传,可自行在网站上查找

using Microsoft.AspNetCore.DataProtection.KeyManagement;
using Microsoft.AspNetCore.Http;
using Microsoft.CodeAnalysis.Text;
using Newtonsoft.Json;
using System.Collections;
using System.Text;
using ToolGood.Words;
using static System.Net.Mime.MediaTypeNames;
using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment;

namespace WebApplication1 //放在自己项目中时,需要更换为自己的命名空间
{
    public class keywords
    {
        public List<string> IllegalKeywords { get; set; }
    }

    public class urlwords
    {
        public List<string> IllegalUrls { get; set; }
    }

    /// <summary>
    /// 提交的内容敏感违禁词检查类
    /// </summary>
    public class subContentCheck
    {
        /// <summary>
        /// 本地静态文件地址路径
        /// </summary>
        private IHostingEnvironment _hostingEnv;

        /// <summary>
        /// 敏感词库
        /// </summary>
        private string dictionaryPath = "/sensitiveWords/sensitiveWords.txt";

        /// <summary>
        /// 敏感链接、网站、网址库
        /// </summary>
        private string urlsPath = "/sensitiveWords/IllegalUrls.txt";

        /// <summary>
        /// 保存敏感词组
        /// </summary>
        public string[] Words { get; set; }

        /// <summary>
        /// 一个参数的构造函数
        /// </summary>
        /// <param name="hostingEnv">本地静态文件地址路径</param>
        public subContentCheck(IHostingEnvironment hostingEnv)
        {
            _hostingEnv = hostingEnv;
            InitDictionary();
        }

        /// <summary>
        /// 初始化内存敏感词库
        /// </summary>
        public void InitDictionary()
        {
            Words = new string[] { };
            string wordsPath = _hostingEnv.WebRootPath + dictionaryPath;
            string urlPath = _hostingEnv.WebRootPath + urlsPath;
            //List<keywords> keys = new List<keywords>();
            //List<urlwords> urls = new List<urlwords>();
            string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8);
            string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8);
            //由于数组是非动态的,不能进行动态的添加,所有先将它转成list,操作
            ArrayList arrayList = new ArrayList(Words.ToList());
            if (readAllWords.Length > 0 || readAllurl.Length > 0)
            {
                if (readAllWords.Length > 1)
                {
                    //keywords key = new keywords();
                    //key.IllegalKeywords = new List<string>();
                    foreach (string itemWords in readAllWords)
                    {
                        string[] allSplitWords = itemWords.Split('|');
                        foreach (string itemSplitWords in allSplitWords)
                        {
                            if (!string.IsNullOrEmpty(itemSplitWords))
                            {
                                arrayList.Add(itemSplitWords);
                                //string aaa = itemSplitWords;
                                //key.IllegalKeywords.Add(aaa);
                                //IllegalKeywords.Add(itemSplitWords);
                            }
                        }
                    }
                    //keys.Add(key);
                }
                else
                {
                    if (readAllWords.Length == 1)
                    {
                        string[] allSplitWords = readAllWords[0].Split('|');
                        //keywords key = new keywords();
                        //key.IllegalKeywords = new List<string>();
                        foreach (string itemSplitWords in allSplitWords)
                        {
                            if (!string.IsNullOrEmpty(itemSplitWords))
                            {
                                arrayList.Add(itemSplitWords);
                                //string aaa = itemSplitWords;
                                //key.IllegalKeywords.Add(aaa);
                                //IllegalKeywords.Add(itemSplitWords);
                            }
                        }
                        //keys.Add(key);
                    }
                }

                if (readAllurl.Length > 1)
                {
                    //urlwords url = new urlwords();
                    //url.IllegalUrls = new List<string>();
                    foreach (string itemUrls in readAllurl)
                    {
                        string[] allSplitUrls = itemUrls.Split('|');
                        foreach (string itemSplitUrls in allSplitUrls)
                        {
                            if (!string.IsNullOrEmpty(itemSplitUrls))
                            {
                                arrayList.Add(itemSplitUrls);
                                //string Keyword = itemSplitUrls;
                                //url.IllegalUrls.Add(Keyword);
                                //IllegalUrls.Add(itemSplitUrls);
                            }
                        }
                    }
                    //urls.Add(url);
                }
                else
                {
                    if (readAllurl.Length == 1)
                    {
                        string[] allSplitUrls = readAllurl[0].Split('|');
                        //urlwords url = new urlwords();
                        //url.IllegalUrls = new List<string>();
                        foreach (string itemSplitUrls in allSplitUrls)
                        {
                            if (!string.IsNullOrEmpty(itemSplitUrls))
                            {
                                arrayList.Add(itemSplitUrls);
                                //IllegalUrls.Add(itemSplitUrls);
                                //string Keyword = itemSplitUrls;
                                //url.IllegalUrls.Add(Keyword);
                            }
                        }
                        //urls.Add(url);
                    }
                }
            }
            //我们在将list转换成String[]数组 
            Words = (string[])arrayList.ToArray(typeof(string));
        }

        /// <summary>
        /// 过滤替换敏感词
        /// </summary>
        /// <param name="sourceText">需要过滤替换的原内容</param>
        /// <param name="replaceChar">敏感词替换的字符;默认替换为‘*’</param>
        /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容替换时发生错误;“2”:需要替换的文本内容为空;其余则返回替换成功的字符串内容</returns>
        public string FilterWithChar(string sourceText, char replaceChar = '*')
        {
            if (!string.IsNullOrEmpty(sourceText))
            {
                string result = "";
                WordsSearch wordsSearch = new WordsSearch();
                try
                {
                    wordsSearch.SetKeywords(Words);
                }
                catch (Exception ex)
                {
                    result = "0";
                    return result;
                }

                try
                {
                    result = wordsSearch.Replace(sourceText, replaceChar);
                    return result;
                }
                catch (Exception ex)
                {
                    return result = "1";
                }
            }
            else
            {
                return "2";
            }
        }

一、什么是敏感词过滤?

敏感词过滤是一种处理网络内容的技术,可以检测和过滤出网络中的敏感/违禁词汇。它通过给定的关键字或字符串,判断网络内容是否包含某些敏感信息,从而防止违反法律法规的信息流通。
通常,可以使用两种方法来过滤敏感词:

  1. 黑名单过滤:即定义一个黑名单,将所有敏感词择记录在其中,然后对输入的文本进行对比,如果发现有敏感词,就将其过滤掉。
  2. 白名单过滤:即定义一个白名单,将所有不敏感的词汇记录在其中,然后对输入的文本进行对比,如果发现有不在白名单中的词汇,就将其过滤掉。

二、ToolGood.Words是什么?

ToolGood.Words是一款高性能非法词(敏感词)检测组件,附带繁体简体互换,支持全角半角互换,获取拼音首字母,获取拼音字母,拼音模糊搜索等功能。
ToolGood.Words的源码网站:ToolGood.Words源码网站

三、在Visual Studio中安装ToolGood.Words

3.1、右键项目解决方案,选择“管理NuGet程序包”,如下图所示:

白名单列表过滤请求url java 白名单过滤是什么意思_System

3.2、切换到“浏览”选项卡,搜索“ToolGood.Words”并安装:

白名单列表过滤请求url java 白名单过滤是什么意思_白名单列表过滤请求url java_02


安装完之后最好重新编译生成项目

四、创建“subContentCheck”类

敏感/违禁词汇因特殊内容不便上传,可自行在网站上查找

using Microsoft.AspNetCore.DataProtection.KeyManagement;
using Microsoft.AspNetCore.Http;
using Microsoft.CodeAnalysis.Text;
using Newtonsoft.Json;
using System.Collections;
using System.Text;
using ToolGood.Words;
using static System.Net.Mime.MediaTypeNames;
using IHostingEnvironment = Microsoft.AspNetCore.Hosting.IHostingEnvironment;

namespace WebApplication1 //放在自己项目中时,需要更换为自己的命名空间
{
    public class keywords
    {
        public List<string> IllegalKeywords { get; set; }
    }

    public class urlwords
    {
        public List<string> IllegalUrls { get; set; }
    }

    /// <summary>
    /// 提交的内容敏感违禁词检查类
    /// </summary>
    public class subContentCheck
    {
        /// <summary>
        /// 本地静态文件地址路径
        /// </summary>
        private IHostingEnvironment _hostingEnv;

        /// <summary>
        /// 敏感词库
        /// </summary>
        private string dictionaryPath = "/sensitiveWords/sensitiveWords.txt";

        /// <summary>
        /// 敏感链接、网站、网址库
        /// </summary>
        private string urlsPath = "/sensitiveWords/IllegalUrls.txt";

        /// <summary>
        /// 保存敏感词组
        /// </summary>
        public string[] Words { get; set; }

        /// <summary>
        /// 一个参数的构造函数
        /// </summary>
        /// <param name="hostingEnv">本地静态文件地址路径</param>
        public subContentCheck(IHostingEnvironment hostingEnv)
        {
            _hostingEnv = hostingEnv;
            InitDictionary();
        }

        /// <summary>
        /// 初始化内存敏感词库
        /// </summary>
        public void InitDictionary()
        {
            Words = new string[] { };
            string wordsPath = _hostingEnv.WebRootPath + dictionaryPath;
            string urlPath = _hostingEnv.WebRootPath + urlsPath;
            //List<keywords> keys = new List<keywords>();
            //List<urlwords> urls = new List<urlwords>();
            string[] readAllWords = System.IO.File.ReadAllLines(wordsPath, System.Text.Encoding.UTF8);
            string[] readAllurl = System.IO.File.ReadAllLines(urlPath, System.Text.Encoding.UTF8);
            //由于数组是非动态的,不能进行动态的添加,所有先将它转成list,操作
            ArrayList arrayList = new ArrayList(Words.ToList());
            if (readAllWords.Length > 0 || readAllurl.Length > 0)
            {
                if (readAllWords.Length > 1)
                {
                    //keywords key = new keywords();
                    //key.IllegalKeywords = new List<string>();
                    foreach (string itemWords in readAllWords)
                    {
                        string[] allSplitWords = itemWords.Split('|');
                        foreach (string itemSplitWords in allSplitWords)
                        {
                            if (!string.IsNullOrEmpty(itemSplitWords))
                            {
                                arrayList.Add(itemSplitWords);
                                //string aaa = itemSplitWords;
                                //key.IllegalKeywords.Add(aaa);
                                //IllegalKeywords.Add(itemSplitWords);
                            }
                        }
                    }
                    //keys.Add(key);
                }
                else
                {
                    if (readAllWords.Length == 1)
                    {
                        string[] allSplitWords = readAllWords[0].Split('|');
                        //keywords key = new keywords();
                        //key.IllegalKeywords = new List<string>();
                        foreach (string itemSplitWords in allSplitWords)
                        {
                            if (!string.IsNullOrEmpty(itemSplitWords))
                            {
                                arrayList.Add(itemSplitWords);
                                //string aaa = itemSplitWords;
                                //key.IllegalKeywords.Add(aaa);
                                //IllegalKeywords.Add(itemSplitWords);
                            }
                        }
                        //keys.Add(key);
                    }
                }

                if (readAllurl.Length > 1)
                {
                    //urlwords url = new urlwords();
                    //url.IllegalUrls = new List<string>();
                    foreach (string itemUrls in readAllurl)
                    {
                        string[] allSplitUrls = itemUrls.Split('|');
                        foreach (string itemSplitUrls in allSplitUrls)
                        {
                            if (!string.IsNullOrEmpty(itemSplitUrls))
                            {
                                arrayList.Add(itemSplitUrls);
                                //string Keyword = itemSplitUrls;
                                //url.IllegalUrls.Add(Keyword);
                                //IllegalUrls.Add(itemSplitUrls);
                            }
                        }
                    }
                    //urls.Add(url);
                }
                else
                {
                    if (readAllurl.Length == 1)
                    {
                        string[] allSplitUrls = readAllurl[0].Split('|');
                        //urlwords url = new urlwords();
                        //url.IllegalUrls = new List<string>();
                        foreach (string itemSplitUrls in allSplitUrls)
                        {
                            if (!string.IsNullOrEmpty(itemSplitUrls))
                            {
                                arrayList.Add(itemSplitUrls);
                                //IllegalUrls.Add(itemSplitUrls);
                                //string Keyword = itemSplitUrls;
                                //url.IllegalUrls.Add(Keyword);
                            }
                        }
                        //urls.Add(url);
                    }
                }
            }
            //我们在将list转换成String[]数组 
            Words = (string[])arrayList.ToArray(typeof(string));
        }

        /// <summary>
        /// 过滤替换敏感词
        /// </summary>
        /// <param name="sourceText">需要过滤替换的原内容</param>
        /// <param name="replaceChar">敏感词替换的字符;默认替换为‘*’</param>
        /// <returns>返回状态码;为空则表示传入的内容为空;“0”:设置违禁词时发生错误;“1”:敏感内容替换时发生错误;“2”:需要替换的文本内容为空;其余则返回替换成功的字符串内容</returns>
        public string FilterWithChar(string sourceText, char replaceChar = '*')
        {
            if (!string.IsNullOrEmpty(sourceText))
            {
                string result = "";
                WordsSearch wordsSearch = new WordsSearch();
                try
                {
                    wordsSearch.SetKeywords(Words);
                }
                catch (Exception ex)
                {
                    result = "0";
                    return result;
                }

                try
                {
                    result = wordsSearch.Replace(sourceText, replaceChar);
                    return result;
                }
                catch (Exception ex)
                {
                    return result = "1";
                }
            }
            else
            {
                return "2";
            }
        }