import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
public class LexicalAnalyzer {
public static ArrayList<Object> word = new ArrayList<>();
public ArrayList<Object> getWord() {
return word;
}
public void setWord(ArrayList<Object> word) {
LexicalAnalyzer.word = word;
}
/*
* 1表示关键字
* 2表示标识符
* 3表示常数
* 4表示运算符
* 5表示界符
* 6表示字符串
* */
//关键字
static String []keyWord={"begin","if","then","while","do","end"};
// {"private","protected","public","abstract","class","extends","final","implements",
// "interface","native","new","static","strictfp","break","continue","return","do","while","if","else","for",
// "instanceof","switch","case","default","boolean","byte","char","double","float","int","long","short",
// "String","null","true","false","void","this","goto"};
//运算符
static String []operation={"+","-","*","/",":",":=","<","<>","<=",">",">=","="};
// {"+","-","*","/","%","++","--","-=","*=","/=","&","|","^","~","<<",">>",">>>","==","!=",
// ">","<","=",">=","<=","&&","||","!","."};
//界符
static String []symbol={";","(",")","#"};
// {",",";",":","(",")","{","}"};
static ArrayList<String> keyWords=null;
static ArrayList<String> operations=null;
static ArrayList<String> symbols=null;
//指向当前所读到字符串的位置的指针
static int p,lines;
public static void main(String []args) throws FileNotFoundException {
init();
File file=new File("E:\\代码\\LexicalAnalyzer.txt");
lines=1;
try(Scanner input=new Scanner(file)) {
while (input.hasNextLine()){
String str=input.nextLine();
analyze(str);
lines++;
}
}
for (Object item:word) {
System.out.println(item);
}
//---------------------------
// for (Object j :
// word) {
// System.out.println("----");
// System.out.println(j);
// }
}
//初始化把数组转换为ArrayList
public static void init(){
keyWords=new ArrayList<>();
operations=new ArrayList<>();
symbols=new ArrayList<>();
Collections.addAll(keyWords, keyWord);
Collections.addAll(operations, operation);
Collections.addAll(symbols, symbol);
}
public static void analyze(String str){
p=0;
char ch;
str=str.trim();
for (;p<str.length();p++){
ch=str.charAt(p); //charAt--返回()内位置的字符
if (Character.isDigit(ch)){ //Character.isDigit(ch)-- 判断ch是否是数字字符
digitCheck(str);
}else if (Character.isLetter(ch)){ //是否是字母或者_
// }else if (Character.isLetter(ch)||ch=='_'){ //是否是字母或者_
letterCheck(str);
}
// else if (ch=='"'){ //是否是"
// stringCheck(str); //字符串检查
// }
else if (ch==' '){ //是否是" " 是空格的话就跳出训话继续执行
continue;
}else {
symbolCheck(str); //如果都不是,就是符号
}
}
}
/*数字的识别
* 1、识别退出:
* 1.1、遇到空格符
* 1.2、遇到运算符或者界符
* 2、错误情况:
* 2.1、两个及以上小数点
* 2.2、掺杂字母
* */
public static void digitCheck(String str){
String token= String.valueOf(str.charAt(p++)); //包装起来,相当于toString的方法,把char类型转换为String类型
//判断数字的小数点是否有且是否大于1
// int flag=0;
boolean err=false;
char ch;
for (;p<str.length();p++) {
ch = str.charAt(p);
if (ch==' ' || !Character.isLetterOrDigit(ch)) { //除了字母和数字,跳出循环
break;
}else{
token += ch;
}
if (Character.isLetter(ch)){
err = true;
}
// else if (err){
// token+=ch;
// }
// else {
// token+=ch;
// if (ch == '.') {
// if (flag == 1) {
// err = true;
// } else {
// flag++;
// }
// }else if (Character.isLetter(ch)){
// err=true;
// }
// }
}
if (token.charAt(token.length()-1)=='.'){
err=true;
}
if (err){
System.out.println(lines+"line"+": "+token+" is wrong");
}else {
System.out.println("("+11+","+token+")");word.add(11);
}
if (p!=str.length()-1||(p==str.length()-1&&!Character.isDigit(str.charAt(p)))){//将p减下去,因为上面的循环最后会进行p++,如果不减下去的话会使得p多一从而导致跳过一个字符
p--;
}
}
//标识符,关键字的识别
public static void letterCheck(String str){
String token= String.valueOf(str.charAt(p++));
char ch;
for (;p<str.length();p++){
ch=str.charAt(p);
if (!Character.isLetterOrDigit(ch)){
break;
}else{
token+=ch;
}
}
if (keyWords.contains(token)){ //是否在集合内,在集合内为1,不在集合内为2
switch (token){
case "begin":System.out.println("("+1+","+token+")");
word.add(1);break;
case "if":System.out.println("("+2+","+token+")");word.add(2);break;
case "then":System.out.println("("+3+","+token+")");word.add(3);break;
case "while":System.out.println("("+4+","+token+")");word.add(4);break;
case "do":System.out.println("("+5+","+token+")");word.add(5);break;
case "end":System.out.println("("+6+","+token+")");word.add(6);break;
default:break;
}
}else {
System.out.println("("+10+","+token+")");word.add(10);
}
if (p!=str.length()-1||(p==str.length()-1&&(!Character.isLetterOrDigit(str.charAt(p))))){
p--;
}
}
//符号的识别
public static void symbolCheck(String str){
String token= String.valueOf(str.charAt(p++));
char ch;
if (symbols.contains(token)){
switch(token){
case ";":System.out.println("("+26+","+token+")");word.add(26);break;
case "(":System.out.println("("+27+","+token+")");word.add(27);break;
case ")":System.out.println("("+28+","+token+")");word.add(28);break;
case "#":System.out.println("("+0+","+token+")");word.add(0);break;
default:break;
}
p--;
}else {
if (operations.contains(token)){
ch=str.charAt(p);
// System.out.println(ch);
// System.out.println(operations.contains(token+ch));
if(operations.contains(token+ch)){
String s =token+ch;
switch (s){
case ":=":System.out.println("("+18+","+s+")");word.add(18);break;
case "<>":System.out.println("("+21+","+s+")");word.add(21);break;
case "<=":System.out.println("("+22+","+s+")");word.add(22);break;
case ">=":System.out.println("("+24+","+s+")");word.add(24);break;
default:break;
}
}else{
p--;
switch (token){
case "+":System.out.println("("+13+","+token+")");word.add(13);break;
case "-":System.out.println("("+14+","+token+")");word.add(14);break;
case "*":System.out.println("("+15+","+token+")");word.add(15);break;
case "/":System.out.println("("+16+","+token+")");word.add(16);break;
case ":":System.out.println("("+17+","+token+")");word.add(17);break;
case "<":System.out.println("("+20+","+token+")");word.add(20);break;
case ">":System.out.println("("+23+","+token+")");word.add(23);break;
case "=":System.out.println("("+25+","+token+")");word.add(25);break;
default:break;
}
}
}
// if (operations.contains(token)){
// if (p<str.length()){ //如果不成立,说明有且只有一个字符直接输出即可
// ch=str.charAt(p);
// if (operations.contains(token+ch)){
// token+=ch;
// p++;
// if (p<str.length()){
// ch=str.charAt(p);
// if (operations.contains(token+ch)){
// token+=ch;
// System.out.println("("+4+","+token+")");
// }else{
// p--;
// System.out.println("("+4+","+token+")");
// }
// }else{
// System.out.println("("+4+","+token+")");
// }
// }else {
// p--;
// System.out.println("("+4+","+token+")");
// }
// }
// }else {
// p--;
// System.out.println(lines+"line"+": "+token+" is wrong");
// }
}
}
//字符串检查
public static void stringCheck(String str){
String token= String.valueOf(str.charAt(p++));
char ch;
for (;p<str.length();p++){
ch=str.charAt(p);
token+=ch;
if (ch=='"'){
break;
}
}
if (token.charAt(token.length()-1)!='"'){
System.out.println(lines+"line"+": "+token+" is wrong");
}else {
System.out.println("("+6+","+token+")");
word.add(6);
}
}
}
如果需要语法分析请看配套的语法分析