基于javaGUI的文档识别工具制作
对于某些文本,其中富含了一些标志,需要去排除,以及去获得段落字数,以下是我个人写的一个比较简单的文档识别工具,含导入文件、导出文件以及一个简单的识别功能。
1、功能实现
以下功能实现纯属于个人想法,可能会有bug,仅供参考
/**
* 去除字符串中的"。",";","?","!",“,”
* 将所有其他多余的符号转换为" "
* @param str 获取到的每行文字
*/
public static String replaceStr(String str){
String new_str = str.replaceAll(";|。|,|?|!|:|”|“|∶|:|,|!|;|—"," ");
return new_str;
}
将文本中的一些断句的中文标点和英文字符用两个空格来替换(为什么用两个空格呢?因为在我的测试文本中可能因为一些个人原因,存在一些单空格来,可能是打字太快导致的),然后利用splite(“ ”)来将句子切割,代码如下:
String new_tempString = replaceStr(tempString);
String[] strings = new_tempString.split(" ");
再通过识别中文,进行获取段落中的字数,代码如下:
// 使用Unicode编码范围来判断汉字;这个方法不准确,因为还有很多汉字不在这个范围之内
public static boolean isChineseByRange(String str) {
if (str == null) {
return false;
}
Pattern pattern = Pattern.compile("[\\u4E00-\\u9FCC]+");
return pattern.matcher(str.trim()).find();
}
2、布局设置
在这里,我使用的是GridBagLayout来进行布局,同时使用JPanel和JScrollPane来作为中间容器。
上图是我所设计的图形用户界面,整体代码:
import javax.swing.*;
import javax.swing.filechooser.FileSystemView;
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.util.regex.Pattern;
public class GUIdesign extends JFrame {
public static void main(String[] args) {
GUIdesign jf = new GUIdesign();
jf.setTitle("文档识别工具1.0");
jf.setSize(400,300);
jf.setLocation(400,200);
GridBagLayout gridBagLayout1 = new GridBagLayout();
GridBagConstraints c1 = new GridBagConstraints();
jf.setLayout(gridBagLayout1);
c1.gridheight = 1;
c1.gridwidth = GridBagConstraints.REMAINDER;
c1.weightx = 2;
c1.weighty = 2;
GridBagLayout gridBagLayout2 = new GridBagLayout();
GridBagConstraints c2 = new GridBagConstraints();
JPanel jPanel1 = new JPanel();
jPanel1.setLayout(gridBagLayout2);
/*
* 第一栏 请选择文件 + textFiled + 导入按钮
*/
c2.gridwidth = 1;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JLabel jLabel1 = new JLabel("请选择文件");
gridBagLayout2.setConstraints(jLabel1,c2);
jPanel1.add(jLabel1);
c2.gridwidth = 1;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JTextField jTextField1 = new JTextField(10);
gridBagLayout2.setConstraints(jTextField1,c2);
jPanel1.add(jTextField1);
c2.gridwidth = GridBagConstraints.REMAINDER;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JButton jButton1 = new JButton("导入");
gridBagLayout2.setConstraints(jButton1,c2);
jPanel1.add(jButton1);
/*
* 第二栏 请输入关键字 + textFiled + 识别按钮
*/
c2.gridwidth = 1;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JLabel jLabel2 = new JLabel("请输入关键字");
gridBagLayout2.setConstraints(jLabel2,c2);
jPanel1.add(jLabel2);
c2.gridwidth = 1;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JTextField jTextField2 = new JTextField(10);
gridBagLayout2.setConstraints(jTextField2,c2);
jPanel1.add(jTextField2);
c2.gridwidth = GridBagConstraints.REMAINDER;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JButton jButton2 = new JButton("识别");
gridBagLayout2.setConstraints(jButton2,c2);
jPanel1.add(jButton2);
/*
* 第三栏 导出按钮
*/
c2.gridwidth = GridBagConstraints.REMAINDER;
c2.gridheight = 1;
c2.weightx = 3;
c2.weighty = 3;
JButton jButton3 = new JButton("导出");
gridBagLayout2.setConstraints(jButton3,c2);
jPanel1.add(jButton3);
gridBagLayout1.setConstraints(jPanel1,c1);
jf.add(jPanel1);
/*
文本框,设置垂直滚动条,取消水平滚动条
*/
c1.gridheight = 2;
c1.gridwidth = GridBagConstraints.REMAINDER;
c1.weightx = 3;
c1.weighty = 3;
JScrollPane jScrollPane = new JScrollPane();
jScrollPane.setHorizontalScrollBarPolicy(ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER);
jScrollPane.setVerticalScrollBarPolicy(ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS);
JTextArea jTextArea = new JTextArea(10,30);
jScrollPane.setViewportView(jTextArea);
gridBagLayout1.setConstraints(jScrollPane,c1);
jf.add(jScrollPane);
//导入按钮事件,选择文件,获取路径
jButton1.addActionListener(ActionListener-> {
jTextArea.setText("");
JFileChooser chooser = new JFileChooser();
chooser.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES);
chooser.showDialog(new JLabel(), "选择");
File file = chooser.getSelectedFile();
jTextField1.setText(file.getAbsolutePath());
});
//识别按钮事件,获取textfield文本,获取文件文本,并判断
jButton2.addActionListener(ActionListener->{
jTextArea.setText("");
File file = new File(jTextField1.getText());
System.out.println(file);
BufferedReader br = null;
try {
String keyWord = jTextField2.getText();
System.out.println(keyWord);
br = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
while ((tempString = br.readLine())!=null){
String new_tempString = replaceStr(tempString);
String[] strings = new_tempString.split(" ");
int count = 0;
for (String str : strings){
int code = 0;
if (str.contains(keyWord)){
char[] chars = str.toCharArray();
for (char aChar : chars) {
if (isChineseByRange(String.valueOf(aChar))){
code++;
}
}
count++;
jTextArea.append("line"+line+",第"+count+"个"+",字数:"+code+"\n");
System.out.println(str);
System.out.println("line"+line+",第"+count+"个"+",字数:"+code);
}
}
line++;
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally {
if (br!=null){
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
});
//导出按钮事件,将显示的文字导出成txt文件
jButton3.addActionListener(ActionListener->{
int result = 0;
File file = null;
String path = null;
Component chatFrame = null;
JFileChooser fileChooser = new JFileChooser();
FileSystemView fsv = FileSystemView.getFileSystemView(); //注意了,这里重要的一句
System.out.println(fsv.getHomeDirectory()); //得到桌面路径
fileChooser.setCurrentDirectory(fsv.getHomeDirectory());
fileChooser.setDialogTitle("请选择要上传文件的路径");
fileChooser.setApproveButtonText("确定");
fileChooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
result = fileChooser.showOpenDialog(chatFrame);
if (JFileChooser.APPROVE_OPTION == result) {
path=fileChooser.getSelectedFile().getPath();
System.out.println("path: "+path);
file = new File(path);
PrintWriter pw = null;
try {
pw=new PrintWriter(file);
pw.write(jTextArea.getText());
pw.flush();
} catch (FileNotFoundException e) {
e.printStackTrace();
}finally {
pw.close();
}
}
});
windowsClose(jf);
jf.setVisible(true);
}
//关闭窗体事件
private static void windowsClose(JFrame jFrame){
jFrame.addWindowListener(new WindowAdapter() {
@Override
public void windowClosing(WindowEvent e) {
System.exit(0);
}
});
}
/**
* 去除字符串中的"。",";","?","!",“,”
* 将所有其他多余的符号转换为" "
* @param str 获取到的每行文字
*/
public static String replaceStr(String str){
String new_str = str.replaceAll(";|。|,|?|!|:|”|“|∶|:|,|!|;|—"," ");
return new_str;
}
// 使用Unicode编码范围来判断汉字;这个方法不准确,因为还有很多汉字不在这个范围之内
public static boolean isChineseByRange(String str) {
if (str == null) {
return false;
}
Pattern pattern = Pattern.compile("[\\u4E00-\\u9FCC]+");
return pattern.matcher(str.trim()).find();
}
}
以上是我写该小工具的所有代码,也许功能并不全面,仅为一些java初学GUI的朋友们提供图形界面设计和功能结合的思路。如有错误,请指正。