定义
子字符串查找:给定一段长度为N的文本和一个长度为M的模式(pattern)字符串,在文本中找到一个和该模式相符的子字符串。
暴力子字符串查找算法
package algorithm.substring;
/**
* 描述:几种子字符串查找
* Created by zjw on 2021/9/5 15:29
*/
public class CommonSearchSubstring {
/**
* 暴力子字符串查找
* @param pat
* @param txt
* @return
*/
public static int search(String pat, String txt) {
int M = pat.length();
int N = txt.length();
for (int i = 0; i <= N - M; i++) {
int j;
for (j = 0; j < M; j++) {
if (txt.charAt(i + j) != pat.charAt(j)) {
break;
}
}
if (j == M) return i;//找到匹配
}
return N;//未找到匹配
}
/**
* 暴力子字符串查找优化
* @param pat
* @param txt
* @return
*/
public static int search2(String pat, String txt) {
int j,M = pat.length();
int i,N = txt.length();
for (i = 0, j = 0; i < N && j < M; i++) {
if (txt.charAt(i) == pat.charAt(j)) j++;
else {
i -= j;
j=0;
}
}
if (j==M) return i - M;//找到匹配
else return N;//未找到匹配
}
}
Knuth-Morris-Pratt字符串查找算法
package algorithm.substring;
/**
* 描述:Knuth-Morris-Pratt字符串查找算法
* Created by zjw on 2021/9/5 15:39
*/
public class KMP {
private final int R; // 基数
private final int m; // 匹配字符长度
private int[][] dfa; // the KMP automoton
public KMP(String pat) {
this.R = 256;
this.m = pat.length();
// build DFA from pattern
dfa = new int[R][m];
dfa[pat.charAt(0)][0] = 1;
for (int x = 0, j = 1; j < m; j++) {
for (int c = 0; c < R; c++)
dfa[c][j] = dfa[c][x]; // mismatch cases.
dfa[pat.charAt(j)][j] = j+1; // Set match case.
x = dfa[pat.charAt(j)][x]; // Update restart state.
}
}
/**
*
* @param pattern
* @param R
*/
public KMP(char[] pattern, int R) {
this.R = R;
this.m = pattern.length;
// build DFA from pattern
int m = pattern.length;
dfa = new int[R][m];
dfa[pattern[0]][0] = 1;
for (int x = 0, j = 1; j < m; j++) {
for (int c = 0; c < R; c++)
dfa[c][j] = dfa[c][x]; // mismatch cases.
dfa[pattern[j]][j] = j+1; // Set match case.
x = dfa[pattern[j]][x]; // Update restart state.
}
}
/**
*
* @param txt
* @return
*/
public int search(String txt) {
// simulate operation of DFA on text
int n = txt.length();
int i, j;
for (i = 0, j = 0; i < n && j < m; i++) {
j = dfa[txt.charAt(i)][j];
}
if (j == m) return i - m; // found
return n; // not found
}
/**
*
* @param text
* @return
*/
public int search(char[] text) {
// simulate operation of DFA on text
int n = text.length;
int i, j;
for (i = 0, j = 0; i < n && j < m; i++) {
j = dfa[text[i]][j];
}
if (j == m) return i - m; // found
return n; // not found
}
}
Boyer-Moore字符串匹配算法
package algorithm.substring;
/**
* 描述:启发式地处理不匹配的字符
* Created by zjw on 2021/9/5 15:43
*/
public class BoyerMoore {
private final int R; // 基数
private int[] right; // 跳过不匹配字符数组
private char[] pattern; // 将匹配存储为数组
private String pat;
public BoyerMoore(String pat) {
this.R = 256;
this.pat = pat;
// 匹配字符最右边出现的位置
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pat.length(); j++)
right[pat.charAt(j)] = j;
}
public BoyerMoore(char[] pattern, int R) {
this.R = R;
this.pattern = new char[pattern.length];
for (int j = 0; j < pattern.length; j++)
this.pattern[j] = pattern[j];
// 匹配字符最右边出现的位置
right = new int[R];
for (int c = 0; c < R; c++)
right[c] = -1;
for (int j = 0; j < pattern.length; j++)
right[pattern[j]] = j;
}
public int search(String txt) {
int m = pat.length();
int n = txt.length();
int skip;
for (int i = 0; i <= n - m; i += skip) {
skip = 0;
for (int j = m-1; j >= 0; j--) {
if (pat.charAt(j) != txt.charAt(i+j)) {
skip = Math.max(1, j - right[txt.charAt(i+j)]);
break;
}
}
if (skip == 0) return i; // found
}
return n; // not found
}
public int search(char[] text) {
int m = pattern.length;
int n = text.length;
int skip;
for (int i = 0; i <= n - m; i += skip) {
skip = 0;
for (int j = m-1; j >= 0; j--) {
if (pattern[j] != text[i+j]) {
skip = Math.max(1, j - right[text[i+j]]);
break;
}
}
if (skip == 0) return i; // found
}
return n; // not found
}
}
Rabin-Karp指纹字符串查找算法
package algorithm.substring;
import java.math.BigInteger;
import java.util.Random;
/**
* 描述:指纹字符串查找算法
* Created by zjw on 2021/9/5 15:46
*/
public class RabinKarp {
private String pat;
private long patHash; // 匹配hash
private int m; // 匹配字符长度
private long q; // 一个大的素数,小到足以避免长时间溢出
private int R; // 基数
private long RM; // R^(M-1) % Q
public RabinKarp(char[] pattern, int R) {
this.pat = String.valueOf(pattern);
this.R = R;
throw new UnsupportedOperationException("Operation not supported yet");
}
public RabinKarp(String pat) {
this.pat = pat;
R = 256;
m = pat.length();
q = longRandomPrime();
// //预计算机^(m-1)%q用于删除前导数字
RM = 1;
for (int i = 1; i <= m-1; i++)
RM = (R * RM) % q;
patHash = hash(pat, m);
}
// 计算键[0..m-1]的hash值
private long hash(String key, int m) {
long h = 0;
for (int j = 0; j < m; j++)
h = (R * h + key.charAt(j)) % q;
return h;
}
// pat[]是否匹配txt[i..i-m+1]?
private boolean check(String txt, int i) {
for (int j = 0; j < m; j++)
if (pat.charAt(j) != txt.charAt(i + j))
return false;
return true;
}
public int search(String txt) {
int n = txt.length();
if (n < m) return n;
long txtHash = hash(txt, m);
// //检查偏移量0处是否匹配
if ((patHash == txtHash) && check(txt, 0))
return 0;
// //检查哈希匹配;如果哈希匹配,请检查是否完全匹配
for (int i = m; i < n; i++) {
//删除前导数字,添加尾随数字,检查是否匹配。
txtHash = (txtHash + q - RM*txt.charAt(i-m) % q) % q;
txtHash = (txtHash*R + txt.charAt(i)) % q;
// 匹配
int offset = i - m + 1;
if ((patHash == txtHash) && check(txt, offset))
return offset;
}
// n不匹配
return n;
}
// //随机31位数
private static long longRandomPrime() {
BigInteger prime = BigInteger.probablePrime(31, new Random());
return prime.longValue();
}
}