import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
/**
* 关键字替换类
*
*/
public class KeywordFilterService {
private static final char endWord = (char) (1);
@SuppressWarnings("unchecked")
private static Map<Character, HashMap> filterMap = new HashMap<Character, HashMap>(1024);
private static List<String> filterWordList = new ArrayList<String>();
private static List<String> replaceWordList = new ArrayList<String>();
@SuppressWarnings("unchecked")
public void init() {
filterMap.clear();
filterWordList.clear();
replaceWordList.clear();
// 加载过滤词库
filterWordList.add("唉呀妈呀");
replaceWordList.add("xxxx");
filterWordList.add("唉呀");
replaceWordList.add("oo");
if (filterWordList.size() > 0) {
for (String filterWord : filterWordList) {
char[] charArray = filterWord.trim().toCharArray();
int len = charArray.length;
if (len > 0) {
Map<Character, HashMap> subMap = filterMap;
for (int i = 0; i < len - 1; i++) {
Map<Character, HashMap> obj = subMap.get(charArray[i]);
if (obj == null) {
// 新索引,增加HashMap
int size = (int) Math.max(2, 16 / Math.pow(2, i));
HashMap<Character, HashMap> subMapTmp = new HashMap<Character, HashMap>(size);
subMap.put(charArray[i], subMapTmp);
subMap = subMapTmp;
} else {
// 索引已经存在
subMap = obj;
}
}
// 处理最后一个字符
Map<Character, HashMap> obj = subMap.get(charArray[len - 1]);
if (obj == null) {
// 新索引,增加HashMap,并设置结束符
int size = (int) Math.max(2, 16 / Math.pow(2, len - 1));
HashMap<Character, HashMap> subMapTmp = new HashMap<Character, HashMap>(size);
subMapTmp.put(endWord, null);
subMap.put(charArray[len - 1], subMapTmp);
} else {
// 索引已经存在,设置结束符
obj.put(endWord, null);
}
}
}
}
}
// 返回是否包含需要过滤的词,匹配到最短的关键词就返回结果
@SuppressWarnings("unchecked")
public static boolean hasFilterWord(String info) {
if (StringUtils.isBlank(info) || filterMap.size() == 0) {
return false;
}
char[] charArray = info.toCharArray();
int len = charArray.length;
for (int i = 0; i < len; i++) {
int index = i;
Map<Character, HashMap> sub = filterMap.get(charArray[index]);
while (sub != null) {
if (sub.containsKey(endWord)) {
// 匹配结束
return true;
} else {
index++;
if (index >= len) {
// 字符串结束
return false;
}
sub = sub.get(charArray[index]);
}
}
}
return false;
}
// 将字符串中包含的关键词过滤并替换为指定字符串,然后退回替换后的字符串
// 尽量匹配最长的关键词再替换
@SuppressWarnings("unchecked")
public static String getFilterString(String info) {
if (StringUtils.isBlank(info) || filterMap.size() == 0) {
return info;
}
char[] charArray = info.toCharArray();
int len = charArray.length;
String newInfo = "";
int i = 0;
String oldInfo = "";
while (i < len) {
int end = -1;
int index;
Map<Character, HashMap> sub = filterMap;
for (index = i; index < len; index++) {
sub = sub.get(charArray[index]);
if (sub == null) {
// 匹配失败,将已匹配的最长字符进行替换
if (end == -1) {
// 没匹配到任何关键词
newInfo += charArray[i];
i++;
break;
} else {
// 将最长匹配字符串替换为特定字符
for (int j = i; j <= end; j++) {
oldInfo += charArray[j];
}
newInfo = newInfo + replaceWordList.get(filterWordList.indexOf(oldInfo));
oldInfo = "";
i = end + 1;
break;
}
} else {
if (sub.containsKey(endWord)) {
// 匹配
end = index;
}
}
}
if (index >= len) {
// 字符串结束
if (end == -1) {
// 没匹配到任何关键词
newInfo += charArray[i];
i++;
} else {
// 将最长匹配字符串替换为特定字符
for (int j = i; j <= end; j++) {
oldInfo += charArray[j];
}
newInfo = newInfo + replaceWordList.get(filterWordList.indexOf(oldInfo));
oldInfo = "";
i = end + 1;
}
}
}
return newInfo;
}
public static void main(String[] args) {
KeywordFilterService filterService = new KeywordFilterService();
filterService.init();
System.out.println(filterService.getFilterString("唉呀妈呀aa你妈呀唉呀呀呀唉呀呀呀唉呀呀妈呀唉呀妈呀呀呀呀唉呀呀呀唉呀呀"));
}
}