读取本地文件,每行为一条记录,文件大小550M,200万条数据。

先将文件读取的内存中,再开启6个线程连接postgresql不同coordinator端口导入数据。

代码如下:

package com.scistor.datavision.operator.common;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

public class InsertThread implements Runnable {
    private String tb;
    private String ip;
    private String port;
    private List<String> list;

    public void setPramater(String tb, String ip, String port, List<String> list) {
        this.tb = tb;
        this.ip = ip;
        this.port = port;
        this.list = list;
    }

    public void run() {
        PreparedStatement ps = null;
        String sql = null;
        Connection conn = null;
        try {
            Class.forName("org.postgresql.Driver");
            String url = "jdbc:postgresql://192.168.8." + ip + ":" + port + "/postgres";
            try {
                conn = DriverManager.getConnection(url, "postgres", "postgres");
            } catch (SQLException e) {
                e.printStackTrace();
            }
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
        int count = 0;
        try {
            conn.setAutoCommit(false);
            sql = "insert into "
                    + tb
                    + " values(?,?,?,?,?,?,?::timestamptz,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
            ps = conn.prepareStatement(sql);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        Long beginTime = System.currentTimeMillis();
        Long begin = System.currentTimeMillis();

        //设置批次大小,按批导入数据
        for (int i = 0; i < list.size(); i++) {
            String[] con = list.get(i).split(",", -1);
            if (con.length != 38) {
                //过滤不符合条件数据
                continue;
            }
            count++;
            try {
                for (int j = 0; j < con.length; j++) {
                    if (con[j] == null) {
                        ps.setString(j + 1, "NULL");
                    } else {
                        ps.setString(j + 1, con[j].trim());
                    }
                }
                ps.addBatch();
                // 批次大小为10000,此处已写死
                if (count > 0 && count % 10000 == 0) {
                    ps.executeBatch();
                    conn.commit();
                    ps.clearBatch();
                    Long midTime = System.currentTimeMillis();
                    //打印数据导入性能
                    System.out.println("-----------------" + count);
                    System.out.println("导入1万条数据性能" + (10000 * 1000)
                            / (midTime - begin));
                    begin = midTime;
                }
            } catch (SQLException e) {
                e.printStackTrace();
                continue;
            }
        }
        try {
            ps.executeBatch();
            conn.commit();
            ps.clearBatch();
        } catch (SQLException e) {
            e.printStackTrace();
        }

        long sum = 1000 * count;
        long endTime = System.currentTimeMillis();
        System.out.println("pst+batch:" + count + "条");
        System.out.println("pst+batch:" + (endTime - beginTime) / 1000 + "秒");
        System.out.println("pst+batch:" + sum / (endTime - beginTime) + "条/秒");

        //关闭数据库连接
        if (ps != null) {
            try {
                ps.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    //读取文件内容
    public static List<String> getContent(String file) throws Exception {
        BufferedReader br = new BufferedReader(new InputStreamReader(
                new FileInputStream(file), "UTF-8"));
        String line = br.readLine();
        List<String> list = new ArrayList<String>();
        while (line != null) {
            list.add(line);
            line = br.readLine();
        }
        br.close();
        return list;
    }

    public static void main(String[] args) {
        String file = args[0];
        String tb = args[1];
        String ip = args[2];
        String[] port = {"2341", "2342", "2343", "2344", "2345", "2346"};
        List<String> list = null;
        try {
            // 获得源数据
            list = getContent(file);
            System.out.println(list.size());
        } catch (Exception e) {
            e.printStackTrace();
        }
        //线程1
        InsertThread myThread1 = new InsertThread();
        myThread1.setPramater(tb, ip, port[0], list);
        Thread thread1 = new Thread(myThread1);
        //线程2
        InsertThread myThread2 = new InsertThread();
        myThread2.setPramater(tb, ip, port[1], list);
        Thread thread2 = new Thread(myThread2);
        //线程3
        InsertThread myThread3 = new InsertThread();
        myThread3.setPramater(tb, ip, port[2], list);
        Thread thread3 = new Thread(myThread3);
        //线程4
        InsertThread myThread4 = new InsertThread();
        myThread4.setPramater(tb, ip, port[3], list);
        Thread thread4 = new Thread(myThread4);
        //线程5
        InsertThread myThread5 = new InsertThread();
        myThread5.setPramater(tb, ip, port[4], list);
        Thread thread5 = new Thread(myThread5);
        //线程6
        InsertThread myThread6 = new InsertThread();
        myThread6.setPramater(tb, ip, port[5], list);
        Thread thread6 = new Thread(myThread6);

        //开启线程
        thread1.start();
        thread2.start();
        thread3.start();
        thread4.start();
        thread5.start();
        thread6.start();
    }
}