import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;

public class LexicalAnalyzer {
    public static ArrayList<Object> word = new ArrayList<>();

    public  ArrayList<Object> getWord() {
        return word;
    }

    public  void setWord(ArrayList<Object> word) {
        LexicalAnalyzer.word = word;
    }

    /*
     * 1表示关键字
     * 2表示标识符
     * 3表示常数
     * 4表示运算符
     * 5表示界符
     * 6表示字符串
     * */

    //关键字
    static String []keyWord={"begin","if","then","while","do","end"};
//            {"private","protected","public","abstract","class","extends","final","implements",
//            "interface","native","new","static","strictfp","break","continue","return","do","while","if","else","for",
//            "instanceof","switch","case","default","boolean","byte","char","double","float","int","long","short",
//            "String","null","true","false","void","this","goto"};
    //运算符
    static String []operation={"+","-","*","/",":",":=","<","<>","<=",">",">=","="};
//        {"+","-","*","/","%","++","--","-=","*=","/=","&","|","^","~","<<",">>",">>>","==","!=",
//            ">","<","=",">=","<=","&&","||","!","."};
    //界符
    static String []symbol={";","(",")","#"};
//        {",",";",":","(",")","{","}"};

    static ArrayList<String> keyWords=null;
    static ArrayList<String> operations=null;
    static ArrayList<String> symbols=null;

    //指向当前所读到字符串的位置的指针
    static int p,lines;


    public static void main(String []args) throws FileNotFoundException {
        init();
        File file=new File("E:\\代码\\LexicalAnalyzer.txt");
        lines=1;
        try(Scanner input=new Scanner(file)) {
            while (input.hasNextLine()){
                String str=input.nextLine();
                analyze(str);
                lines++;
            }
        }
        for (Object item:word) {
            System.out.println(item);
        }

        //---------------------------

//        for (Object j :
//                word) {
//            System.out.println("----");
//            System.out.println(j);
//        }
    }

    //初始化把数组转换为ArrayList
    public static void init(){
        keyWords=new ArrayList<>();
        operations=new ArrayList<>();
        symbols=new ArrayList<>();
        Collections.addAll(keyWords, keyWord);
        Collections.addAll(operations, operation);
        Collections.addAll(symbols, symbol);
    }

    public static void analyze(String str){

        p=0;
        char ch;
        str=str.trim();
        for (;p<str.length();p++){
            ch=str.charAt(p);     //charAt--返回()内位置的字符
            if (Character.isDigit(ch)){     //Character.isDigit(ch)-- 判断ch是否是数字字符
                digitCheck(str);
            }else if (Character.isLetter(ch)){  //是否是字母或者_
//            }else if (Character.isLetter(ch)||ch=='_'){  //是否是字母或者_
                letterCheck(str);
            }
//            else if (ch=='"'){     //是否是"
//                stringCheck(str);  //字符串检查
//            }
            else if (ch==' '){      //是否是" " 是空格的话就跳出训话继续执行
                continue;
            }else {
                symbolCheck(str);   //如果都不是,就是符号
            }
        }

    }

    /*数字的识别
     * 1、识别退出:
     *   1.1、遇到空格符
     *   1.2、遇到运算符或者界符
     * 2、错误情况:
     *   2.1、两个及以上小数点
     *   2.2、掺杂字母
     * */
    public static void digitCheck(String str){
        String token= String.valueOf(str.charAt(p++));  //包装起来,相当于toString的方法,把char类型转换为String类型
        //判断数字的小数点是否有且是否大于1
//        int flag=0;
        boolean err=false;
        char ch;
        for (;p<str.length();p++) {
            ch = str.charAt(p);
            if (ch==' ' || !Character.isLetterOrDigit(ch)) {  //除了字母和数字,跳出循环
                break;
            }else{
                token += ch;
            }
            if (Character.isLetter(ch)){
                err = true;
            }

//            else if (err){
//                token+=ch;
//            }
//            else {
//                token+=ch;
//                if (ch == '.') {
//                    if (flag == 1) {
//                        err = true;
//                    } else {
//                        flag++;
//                    }
//                }else if (Character.isLetter(ch)){
//                    err=true;
//                }
//            }
        }
        if (token.charAt(token.length()-1)=='.'){
            err=true;
        }
        if (err){
            System.out.println(lines+"line"+": "+token+" is wrong");
        }else {
            System.out.println("("+11+","+token+")");word.add(11);
        }
        if (p!=str.length()-1||(p==str.length()-1&&!Character.isDigit(str.charAt(p)))){//将p减下去,因为上面的循环最后会进行p++,如果不减下去的话会使得p多一从而导致跳过一个字符
            p--;
        }
    }

    //标识符,关键字的识别
    public static void letterCheck(String str){
        String token= String.valueOf(str.charAt(p++));
        char ch;
        for (;p<str.length();p++){
            ch=str.charAt(p);
            if (!Character.isLetterOrDigit(ch)){
                break;
            }else{
                token+=ch;
            }
        }
        if (keyWords.contains(token)){     //是否在集合内,在集合内为1,不在集合内为2
            switch (token){
                case "begin":System.out.println("("+1+","+token+")");
                    word.add(1);break;
                case "if":System.out.println("("+2+","+token+")");word.add(2);break;
                case "then":System.out.println("("+3+","+token+")");word.add(3);break;
                case "while":System.out.println("("+4+","+token+")");word.add(4);break;
                case "do":System.out.println("("+5+","+token+")");word.add(5);break;
                case "end":System.out.println("("+6+","+token+")");word.add(6);break;
                default:break;
            }
        }else {
            System.out.println("("+10+","+token+")");word.add(10);
        }
        if (p!=str.length()-1||(p==str.length()-1&&(!Character.isLetterOrDigit(str.charAt(p))))){
            p--;
        }
    }

    //符号的识别
    public static void symbolCheck(String str){
        String token= String.valueOf(str.charAt(p++));
        char ch;
        if (symbols.contains(token)){
            switch(token){
                case ";":System.out.println("("+26+","+token+")");word.add(26);break;
                case "(":System.out.println("("+27+","+token+")");word.add(27);break;
                case ")":System.out.println("("+28+","+token+")");word.add(28);break;
                case "#":System.out.println("("+0+","+token+")");word.add(0);break;
                default:break;
            }
            p--;
        }else {
            if (operations.contains(token)){
                ch=str.charAt(p);
//                System.out.println(ch);
//                System.out.println(operations.contains(token+ch));
                if(operations.contains(token+ch)){
                    String s =token+ch;
                    switch (s){
                        case ":=":System.out.println("("+18+","+s+")");word.add(18);break;
                        case "<>":System.out.println("("+21+","+s+")");word.add(21);break;
                        case "<=":System.out.println("("+22+","+s+")");word.add(22);break;
                        case ">=":System.out.println("("+24+","+s+")");word.add(24);break;
                        default:break;
                    }
                }else{
                    p--;
                    switch (token){
                        case "+":System.out.println("("+13+","+token+")");word.add(13);break;
                        case "-":System.out.println("("+14+","+token+")");word.add(14);break;
                        case "*":System.out.println("("+15+","+token+")");word.add(15);break;
                        case "/":System.out.println("("+16+","+token+")");word.add(16);break;
                        case ":":System.out.println("("+17+","+token+")");word.add(17);break;
                        case "<":System.out.println("("+20+","+token+")");word.add(20);break;
                        case ">":System.out.println("("+23+","+token+")");word.add(23);break;
                        case "=":System.out.println("("+25+","+token+")");word.add(25);break;
                        default:break;
                    }
                }
            }
//            if (operations.contains(token)){
//                if (p<str.length()){      //如果不成立,说明有且只有一个字符直接输出即可
//                    ch=str.charAt(p);
//                    if (operations.contains(token+ch)){
//                        token+=ch;
//                        p++;
//                        if (p<str.length()){
//                            ch=str.charAt(p);
//                            if (operations.contains(token+ch)){
//                                token+=ch;
//                                System.out.println("("+4+","+token+")");
//                            }else{
//                                p--;
//                                System.out.println("("+4+","+token+")");
//                            }
//                        }else{
//                            System.out.println("("+4+","+token+")");
//                        }
//                    }else {
//                        p--;
//                        System.out.println("("+4+","+token+")");
//                    }
//                }
//            }else {
//                p--;
//                System.out.println(lines+"line"+": "+token+" is wrong");
//            }
        }
    }

    //字符串检查
    public static void stringCheck(String str){
        String token= String.valueOf(str.charAt(p++));
        char ch;
        for (;p<str.length();p++){
            ch=str.charAt(p);
            token+=ch;
            if (ch=='"'){
                break;
            }
        }
        if (token.charAt(token.length()-1)!='"'){
            System.out.println(lines+"line"+": "+token+" is wrong");
        }else {
            System.out.println("("+6+","+token+")");
            word.add(6);
        }
    }
}

如果需要语法分析请看配套的语法分析