编译原理词法分析

原创

wx61c3ecc946c9a 2022-01-02 15:42:01 博主文章分类：本科课程 ©著作权

©著作权归作者所有：来自51CTO博客作者wx61c3ecc946c9a的原创作品，请联系作者获取转载授权，否则将追究法律责任

本次实验从input.txt文件中读取一段程序，输出二元式到output.txt文件中。
代码中的文件路径请自己修改。
使用java实现。

Main类

每次调用词法分析获取一个单词，并输出到文件中。

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;

public class Main {

  public static void main(String[] args) {
    FileReader input = null;
    FileWriter output = null;

    try {
      input = new FileReader("D:\\WorkSpace\\java\\ComplierExperiment\\src\\complierExperiment1\\input.txt");
      output = new FileWriter("D:\\WorkSpace\\java\\ComplierExperiment\\src\\complierExperiment1\\output.txt");
      // 文件中读取的字符存放在charset中
      char[] charset = new char[1024 * 1024];
      // 从文件中读取的字符数量
      int codeNumber = input.read(charset);
      // 新建CodeScanner对象
      CodeSannner sc = new CodeSannner(codeNumber, charset);
      // 当没有读到结束的时候
      while (sc.getIndex() < codeNumber) {
        // 扫描一个单词，得到一个二元组
        Tuple tuple = sc.scan();
        // 将扫描得到的二元组写入到文件中
        output.write(tuple.getTypeNum() + "\t" + tuple.getStrToken() + "\n");
        // 清空strToken
        sc.clear();
      }
      // 清空输出文件缓冲区
      output.flush();
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } finally {
      if (input != null) {
        try {
          input.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
      if (output != null) {
        try {
          output.close();
        } catch (IOException e) {
          e.printStackTrace();
        }
      }
    }

  }

}

Tuple类

表示二元组

// 识别得到的一个单词的种别码和值
public class Tuple {

  public int getTypeNum() {
    return typeNum;
  }

  public void setTypeNum(int typeNum) {
    this.typeNum = typeNum;
  }

  public String getStrToken() {
    return strToken;
  }

  public void setStrToken(String strToken) {
    this.strToken = strToken;
  }

  private int typeNum; // 种别码

  private String strToken; // 单词

  public Tuple(int typeNum, String strToken) {
    this.typeNum = typeNum;
    this.strToken = strToken;
  }

}

CodeScanner类

核心总控程序是其中的scan方法，请仔细查看。

总控程序流程图，相当于scan方法总体思路

编译原理词法分析_运算符

2. 识别无符号数流程图

编译原理词法分析_java_02

3. 代码如下：

public class CodeSannner {

  private int errorNum = -1; // 出错
  private int keyWordsNum = 1; // 1代表关键字
  private int identifierNum = 2; // 2代表标识符
  private int constantNum = 3; // 3代表常数
  private int operationalCharsNum = 4; // 4代表运算符(不包括+-*/=）
  private int delimiterNum = 5; // 5代表界限符号
  private int addAndSubOp = 6; // 6代表加减运算符
  private int mulAndDivOp = 7; // 7代表乘除运算符
  private int assignOp = 8; // 8代表赋值运算符

  private char ch;
  private int index = 0; // 处理的下标
  private String strToken = ""; // 整个标识符
  private int charsSize = 0; // chars中有效字符的大小
  private char[] chars = null; // 输入文件读取到的字符数组

  // 接下来的String数组列出了目前可以识别的标识符、运算符、界符
  private String[] keyWords = { "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class",
      "const", "continue", "default", "do", "double", "else", "enum", "extends", "final", "finally", "float",
      "for", "goto", "if", "implements", "import", "instanceof", "int", "interface", "long", "native", "new",
      "package", "private", "protected", "public", "return", "strictfp", "short", "static", "super", "switch",
      "synchronized", "this", "throw", "throws", "transient", "try", "void", "volatile", "while" };
  private String[] operationalChars = { "+", "-", "*", "/", "%", "++", "--", ">", "<", "=", "==", "+=", "-=", "*=",
      "/=", "%=" };
  private String[] Delimiter = { "(", ")", ";", ",", "{", "}", "[", "]" };

  public CodeSannner(int charsSize, char[] chars) {
    super();
    this.charsSize = charsSize;
    this.chars = chars;
  }

  // 获取index位置的字符，同时index向后移动一个位置
  private void getChar() {
    if (index < charsSize) {
      ch = chars[index];
      index++;
    }
  }

  // 判断ch是否为空白符
  // 还要有\r否则会出错
  // TODO
  public boolean isBC() {
    return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
  }

  // 跳过空白符，直至ch为一非空白符，\r\n在windows的记事本中为换行
  public void getBlankCharacter() {
    while (index < charsSize && isBC()) {
      getChar();
    }
  }

  // 把当前字符ch连接到strToken
  public void concat() {
    strToken = strToken + ch;
  }

  // 判断当前字符ch是否为字母
  public boolean isLetter() {
    return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'));
  }

  // 判断字符ch是否为数字
  public boolean isDigit() {
    return (ch >= '0' && ch <= '9');
  }

  // 对于strToken中的字符串查找保留字表，若它是保留字则给出他在保留字表中的下标，否则返回-1
  public int reserve() {
    for (int i = 0; i < keyWords.length; i++) {
      if (strToken.equals(keyWords[i])) {
        return i;
      }
    }
    return -1;
  }

  // 把搜索指针回调一个字符位置，同时更新ch
  public void retract() {
    index--;
    ch = chars[index];
  }

  // 判断字符是否为数字、字母、下划线、$
  public boolean isLetterOrDigitOr$Or_() {
    return (ch == '_' || ch == '$' || isLetter() || isDigit());
  }

  // 通过判断首字符，进入不同的判断程序中
  // 本次实验的核心部分
  public Tuple scan() {

    // 获取首字符
    // 这里调用两个函数的顺序不能互换，否则会出错！！
    // TODO
    getChar();
    getBlankCharacter();

    // 标识符的首字符不能以数字开头
    // TODO
    // 首字符是字母下划线$的，可能是标识符或者关键字
    if (isLetterOr$Or_()) {
      while (isLetterOrDigitOr$Or_()) {
        concat();
        getChar();
      }
      retract();
      int keyWordsIndex = -1;
      // 如果不是保留字
      if ((keyWordsIndex = reserve()) == -1) {
        return new Tuple(identifierNum, strToken);
      }
      // 如果是保留字
      else {
        return new Tuple(keyWordsNum, keyWords[keyWordsIndex]);
      }
    }
    // 首字符是数字
    else if (isDigit()) {
      // 处理整数部分
      while (isDigit()) {
        concat();
        getChar();
      }
      // 处理小数点以及之后的整数部分
      if (ch == '.') {
        concat();
        // 获取小数点后第一位字符
        getChar();
        if (isDigit()) {
          // 如果该字符是数字
          while (isDigit()) {
            concat();
            getChar();
          }
        }
        // 报错
        else {
          concat();
          return new Tuple(errorNum, "ERROR INFO: " + new String(strToken));
        }

      }
      // 判断ch是否为e
      if (ch == 'e' || ch == 'E') {
        concat();
        getChar();
        if (ch == '-' || ch == '+') {
          concat();
          getChar();
        }
        if (isDigit()) {
          while (isDigit()) {
            concat();
            getChar();
          }
        }
        // 报错
        else {
          concat();
          return new Tuple(errorNum, "ERROR INFO: " + new String(strToken));
        }
      }
      // 退掉多处理的字符
      retract();
      // 成功走到常数的末尾
      return new Tuple(constantNum, strToken);
    }
    // 首字符不是数字、字母、下划线、$
    else {
      switch (ch) {
      // 界符 5
      case ',':
        concat();
        return new Tuple(delimiterNum, strToken);
      case ';':
        concat();
        return new Tuple(delimiterNum, strToken);
      case '{':
        concat();
        return new Tuple(delimiterNum, strToken);
      case '}':
        concat();
        return new Tuple(delimiterNum, strToken);
      case '[':
        concat();
        return new Tuple(delimiterNum, strToken);
      case ']':
        concat();
        return new Tuple(delimiterNum, strToken);
      case '(':
        concat();
        return new Tuple(delimiterNum, strToken);
      case ')':
        concat();
        return new Tuple(delimiterNum, strToken);
      // 运算符 4
      case '=':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        // 赋值运算符
        return new Tuple(assignOp, strToken);
      case '+':
        concat();
        getChar();
        if (ch == '+') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        } else if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        // 加减法运算符
        return new Tuple(addAndSubOp, strToken);
      case '-':
        concat();
        getChar();
        if (ch == '-') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        } else if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        // 加减法运算符
        return new Tuple(addAndSubOp, strToken);
      case '*':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        // 乘除法法运算符
        return new Tuple(mulAndDivOp, strToken);

      case '/':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        // 乘除法法运算符
        return new Tuple(mulAndDivOp, strToken);
      case '%':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        return new Tuple(operationalCharsNum, strToken);
      case '>':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        return new Tuple(operationalCharsNum, strToken);
      case '<':
        concat();
        getChar();
        if (ch == '=') {
          concat();
          return new Tuple(operationalCharsNum, strToken);
        }
        retract();
        return new Tuple(operationalCharsNum, strToken);
      // 出错处理
      default:
        concat();
        return new Tuple(errorNum, "ERROR INFO: " + new String(strToken));
      }
    }
  }

  private boolean isLetterOr$Or_() {
    return isLetter() || ch == '$' || ch == '_';
  }

  public int getIndex() {
    return index;
  }

  public void clear() {
    strToken = "";
  }
}