定义

子字符串查找:给定一段长度为N的文本和一个长度为M的模式(pattern)字符串,在文本中找到一个和该模式相符的子字符串。

暴力子字符串查找算法

package algorithm.substring;

/**
 * 描述:几种子字符串查找
 * Created by zjw on 2021/9/5 15:29
 */
public class CommonSearchSubstring {

    /**
     * 暴力子字符串查找
     * @param pat
     * @param txt
     * @return
     */
    public static int search(String pat, String txt) {
        int M = pat.length();
        int N = txt.length();
        for (int i = 0; i <= N - M; i++) {
            int j;
            for (j = 0; j < M; j++) {
                if (txt.charAt(i + j) != pat.charAt(j)) {
                    break;
                }
            }
            if (j == M) return i;//找到匹配
        }
        return N;//未找到匹配
    }

    /**
     * 暴力子字符串查找优化
     * @param pat
     * @param txt
     * @return
     */
    public static int search2(String pat, String txt) {
        int j,M = pat.length();
        int i,N = txt.length();
        for (i = 0, j = 0; i < N && j < M; i++) {
            if (txt.charAt(i) == pat.charAt(j)) j++;
            else {
                i -= j;
                j=0;
            }
        }
        if (j==M) return i - M;//找到匹配
        else return N;//未找到匹配
        
    }
    
}

Knuth-Morris-Pratt字符串查找算法

package algorithm.substring;


/**
 * 描述:Knuth-Morris-Pratt字符串查找算法
 * Created by zjw on 2021/9/5 15:39
 */
public class KMP {
    private final int R;       // 基数
    private final int m;       // 匹配字符长度
    private int[][] dfa;       // the KMP automoton

    public KMP(String pat) {
        this.R = 256;
        this.m = pat.length();

        // build DFA from pattern
        dfa = new int[R][m]; 
        dfa[pat.charAt(0)][0] = 1; 
        for (int x = 0, j = 1; j < m; j++) {
            for (int c = 0; c < R; c++) 
                dfa[c][j] = dfa[c][x];     //  mismatch cases. 
            dfa[pat.charAt(j)][j] = j+1;   // Set match case. 
            x = dfa[pat.charAt(j)][x];     // Update restart state. 
        } 
    }

    /**
     * 
     * @param pattern
     * @param R
     */
    public KMP(char[] pattern, int R) {
        this.R = R;
        this.m = pattern.length;

        // build DFA from pattern
        int m = pattern.length;
        dfa = new int[R][m]; 
        dfa[pattern[0]][0] = 1; 
        for (int x = 0, j = 1; j < m; j++) {
            for (int c = 0; c < R; c++) 
                dfa[c][j] = dfa[c][x];     //  mismatch cases. 
            dfa[pattern[j]][j] = j+1;      // Set match case. 
            x = dfa[pattern[j]][x];        // Update restart state. 
        } 
    }

    /**
     * 
     * @param txt
     * @return
     */
    public int search(String txt) {

        // simulate operation of DFA on text
        int n = txt.length();
        int i, j;
        for (i = 0, j = 0; i < n && j < m; i++) {
            j = dfa[txt.charAt(i)][j];
        }
        if (j == m) return i - m;    // found
        return n;                    // not found
    }

    /**
     * 
     * @param text
     * @return
     */
    public int search(char[] text) {

        // simulate operation of DFA on text
        int n = text.length;
        int i, j;
        for (i = 0, j = 0; i < n && j < m; i++) {
            j = dfa[text[i]][j];
        }
        if (j == m) return i - m;    // found
        return n;                    // not found
    }
}

Boyer-Moore字符串匹配算法

package algorithm.substring;

/**
 * 描述:启发式地处理不匹配的字符
 * Created by zjw on 2021/9/5 15:43
 */
public class BoyerMoore {
    private final int R;     // 基数
    private int[] right;     // 跳过不匹配字符数组

    private char[] pattern;  // 将匹配存储为数组
    private String pat;      

    public BoyerMoore(String pat) {
        this.R = 256;
        this.pat = pat;
        // 匹配字符最右边出现的位置
        right = new int[R];
        for (int c = 0; c < R; c++)
            right[c] = -1;
        for (int j = 0; j < pat.length(); j++)
            right[pat.charAt(j)] = j;
    }

    public BoyerMoore(char[] pattern, int R) {
        this.R = R;
        this.pattern = new char[pattern.length];
        for (int j = 0; j < pattern.length; j++)
            this.pattern[j] = pattern[j];

        // 匹配字符最右边出现的位置
        right = new int[R];
        for (int c = 0; c < R; c++)
            right[c] = -1;
        for (int j = 0; j < pattern.length; j++)
            right[pattern[j]] = j;
    }

    
    public int search(String txt) {
        int m = pat.length();
        int n = txt.length();
        int skip;
        for (int i = 0; i <= n - m; i += skip) {
            skip = 0;
            for (int j = m-1; j >= 0; j--) {
                if (pat.charAt(j) != txt.charAt(i+j)) {
                    skip = Math.max(1, j - right[txt.charAt(i+j)]);
                    break;
                }
            }
            if (skip == 0) return i;    // found
        }
        return n;                       // not found
    }


   
    public int search(char[] text) {
        int m = pattern.length;
        int n = text.length;
        int skip;
        for (int i = 0; i <= n - m; i += skip) {
            skip = 0;
            for (int j = m-1; j >= 0; j--) {
                if (pattern[j] != text[i+j]) {
                    skip = Math.max(1, j - right[text[i+j]]);
                    break;
                }
            }
            if (skip == 0) return i;    // found
        }
        return n;                       // not found
    }
}

Rabin-Karp指纹字符串查找算法

package algorithm.substring;

import java.math.BigInteger;
import java.util.Random;

/**
 * 描述:指纹字符串查找算法
 * Created by zjw on 2021/9/5 15:46
 */
public class RabinKarp {
    private String pat;      
    private long patHash;    // 匹配hash
    private int m;           // 匹配字符长度
    private long q;          // 一个大的素数,小到足以避免长时间溢出
    private int R;           // 基数
    private long RM;         // R^(M-1) % Q
    
    public RabinKarp(char[] pattern, int R) {
        this.pat = String.valueOf(pattern);
        this.R = R;        
        throw new UnsupportedOperationException("Operation not supported yet");
    }
    
    public RabinKarp(String pat) {
        this.pat = pat;      
        R = 256;
        m = pat.length();
        q = longRandomPrime();

        // //预计算机^(m-1)%q用于删除前导数字
        RM = 1;
        for (int i = 1; i <= m-1; i++)
            RM = (R * RM) % q;
        patHash = hash(pat, m);
    } 

    // 计算键[0..m-1]的hash值 
    private long hash(String key, int m) { 
        long h = 0; 
        for (int j = 0; j < m; j++) 
            h = (R * h + key.charAt(j)) % q;
        return h;
    }

    // pat[]是否匹配txt[i..i-m+1]?
    private boolean check(String txt, int i) {
        for (int j = 0; j < m; j++) 
            if (pat.charAt(j) != txt.charAt(i + j)) 
                return false; 
        return true;
    }
    
    public int search(String txt) {
        int n = txt.length(); 
        if (n < m) return n;
        long txtHash = hash(txt, m); 

        // //检查偏移量0处是否匹配
        if ((patHash == txtHash) && check(txt, 0))
            return 0;

        // //检查哈希匹配;如果哈希匹配,请检查是否完全匹配
        for (int i = m; i < n; i++) {
            //删除前导数字,添加尾随数字,检查是否匹配。 
            txtHash = (txtHash + q - RM*txt.charAt(i-m) % q) % q; 
            txtHash = (txtHash*R + txt.charAt(i)) % q; 

            // 匹配
            int offset = i - m + 1;
            if ((patHash == txtHash) && check(txt, offset))
                return offset;
        }

        // n不匹配
        return n;
    }


    // //随机31位数
    private static long longRandomPrime() {
        BigInteger prime = BigInteger.probablePrime(31, new Random());
        return prime.longValue();
    }
}