java实现大文件导出的实现与优化
作者:淡抹心痕
关于大文件导出的优化迭代情况如下:
计算机配置:四核16G内存
初始版本为单线程单文件导出文件,mybatis读 opencsv写,耗时将近三小时;
第一轮优化改为多线程单文件,提高读数据效率,时间仅缩减十分钟;
第二轮改为多线程多文件,提高写文件效率,时间缩减一个半小时;
第三轮使用 Mybatis 流式查询,并改用 Map 封装数据,提高内存利用率,时间缩减十分钟;
第四轮弃用 Mybatis ,改用原生 JDBC 获取数据并直接拼接,时间缩减十分钟;
第五轮弃用 opencsv ,改用 BufferWriter 直接写数据,时间缩减十分钟;
输出:
2023-04-23 22:01:30 [main] INFO WriteData - 单线程单文件 total time in 258s
2023-04-23 22:02:44 [main] INFO WriteData - 固定线程单文件 total time in 74s
2023-04-23 22:03:40 [main] INFO WriteData - 固定线程多文件 total time in 55s
2023-04-23 22:04:18 [main] INFO WriteData - concurrentWrite total time in 37s
2023-04-23 22:26:28 [Thread-1] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-3] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-4] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-6] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-7] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-2] INFO WriteData - query in 42s
2023-04-23 22:26:28 [Thread-5] INFO WriteData - query in 42s
2023-04-23 22:26:30 [Thread-0] INFO WriteData - query in 44s
2023-04-23 22:27:00 [Thread-5] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-1] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-7] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-2] INFO WriteData - write in 31s
2023-04-23 22:27:00 [Thread-3] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-6] INFO WriteData - write in 32s
2023-04-23 22:27:00 [Thread-4] INFO WriteData - write in 32s
2023-04-23 22:27:01 [Thread-0] INFO WriteData - write in 31s
2023-04-23 22:27:01 [main] INFO WriteData - 固定线程单文件 total time in 75s
2023-04-23 22:27:24 [Thread-14] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-13] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-12] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-9] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-11] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-10] INFO WriteData - query in 22s
2023-04-23 22:27:24 [Thread-15] INFO WriteData - query in 22s
2023-04-23 22:27:25 [Thread-8] INFO WriteData - query in 23s
2023-04-23 22:27:55 [Thread-12] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-14] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-9] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-11] INFO WriteData - write in 31s
2023-04-23 22:27:55 [Thread-13] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-15] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-10] INFO WriteData - write in 31s
2023-04-23 22:27:56 [Thread-8] INFO WriteData - write in 31s
2023-04-23 22:27:56 [main] INFO WriteData - 固定线程多文件 total time in 54s
示例代码
import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.file.Files; import java.nio.file.Paths; import java.sql.*; import java.time.Duration; import java.time.LocalDate; import java.util.concurrent.CompletableFuture; import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; public class WriteData { static final Logger log = LoggerFactory.getLogger(WriteData.class); public static final String PARENT_PATH = "C:\\Users\\qiu01\\Desktop\\server\\docker\\mysql\\master\\data\\stu_data\\"; public static final String URL = "jdbc:mysql://localhost:3307/stu?allowPublicKeyRetrieval=TRUE&useCursorFetch=true"; public static final String USERNAME = "root"; public static final String PASSWORD = "123456"; public static final String SQL = "SELECT * FROM student WHERE id > ? AND id <= ?"; public static final int TOTAL = 10000000; public static final ThreadPoolExecutor POOL = new ThreadPoolExecutor(8, 9, 3, TimeUnit.SECONDS, new LinkedBlockingDeque<>()); public static final HikariDataSource DS; static { HikariConfig config = new HikariConfig(); config.setJdbcUrl(URL); config.setUsername(USERNAME); config.setPassword(PASSWORD); DS = new HikariDataSource(config); } public static void main(String[] args) { // 单线程写文件 singleThreadWrite(); // 固定线程写同 concurrentWriteWithFixedThread(true); concurrentWriteWithFixedThread(false); concurrentWrite(); } public static void singleThreadWrite() { String file = PARENT_PATH + "file.csv"; long start = System.currentTimeMillis(); try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file)))); Connection connection = DS.getConnection(); PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);) { stmt.setFetchSize(10000); stmt.setFetchDirection(ResultSet.FETCH_REVERSE); stmt.setInt(1, 0); stmt.setInt(2, 10000000); ResultSet rs = stmt.executeQuery(); writeToFile(writer, rs); } catch (SQLException | IOException e) { throw new RuntimeException(e); } log.info("单线程单文件 total time in {}s", getSeconds(start)); emptyFolder(); } private static void concurrentWriteWithFixedThread(boolean writeInOneFile) { int batch_size = 1250000; Thread[] threads = new Thread[TOTAL/batch_size]; long start = System.currentTimeMillis(); for (int i = 0; i < TOTAL; i = i + batch_size) { final int j = i; int no = i / batch_size; Thread t = new Thread(() -> { String file; if (writeInOneFile) { file = PARENT_PATH + "file.csv"; } else { file = PARENT_PATH + "file_" + no + ".csv"; } try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true))); Connection connection = DS.getConnection(); PreparedStatement stmt = connection.prepareStatement(SQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); ) { stmt.setFetchSize(10000); stmt.setFetchDirection(ResultSet.FETCH_REVERSE); stmt.setInt(1, j); stmt.setInt(2, j + batch_size); long queryStart = System.currentTimeMillis(); try (ResultSet rs = stmt.executeQuery()) { log.info("query in {}s", getSeconds(queryStart)); long writeStart = System.currentTimeMillis(); writeToFile(writer, rs); log.info("write in {}s", getSeconds(writeStart)); } } catch (SQLException | IOException e) { throw new RuntimeException(e); } }); t.start(); threads[no] = t; } for (Thread t : threads) { try { t.join(); } catch (InterruptedException e) { throw new RuntimeException(e); } } if (writeInOneFile) { log.info("固定线程单文件 total time in {}s", getSeconds(start)); } else { log.info("固定线程多文件 total time in {}s", getSeconds(start)); } // emptyFolder(); } private static void concurrentWrite() { int batch_size = 10000; CompletableFuture<Void>[] futures = new CompletableFuture[TOTAL/batch_size]; long start = System.currentTimeMillis(); for (int i = 0; i < TOTAL; i = i + batch_size) { final int j = i; int no = i / batch_size; CompletableFuture<Void> t = CompletableFuture.runAsync(() -> { String file = PARENT_PATH + "file_" + no + ".csv"; try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(Files.newOutputStream(Paths.get(file)))); Connection connection = DS.getConnection(); PreparedStatement stmt = connection.prepareStatement(SQL,ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); ) { stmt.setInt(1, j); stmt.setInt(2, j + batch_size); try (ResultSet rs = stmt.executeQuery()){ writeToFile(writer, rs); } } catch (SQLException | IOException e) { throw new RuntimeException(e); } },POOL); futures[no] = t; } CompletableFuture.allOf(futures).join(); log.info("多线程多文件 total time in {}s", getSeconds(start)); POOL.shutdown(); emptyFolder(); } private static void emptyFolder() { File file = new File(PARENT_PATH); File[] files = file.listFiles(); for (File f : files) { f.delete(); } } private static void writeToFile(BufferedWriter writer, ResultSet rs) throws SQLException, IOException { StringBuilder builder = new StringBuilder(); while (rs.next()) { String firstName = rs.getString("first_name"); String lastName = rs.getString("last_name"); LocalDate dob = rs.getDate("date_of_birth").toLocalDate(); String gender = rs.getString("gender"); String email = rs.getString("email"); String phone = rs.getString("phone_number"); String address = rs.getString("address"); String city = rs.getString("city"); String state = rs.getString("state"); String zip = rs.getString("zip_code"); String country = rs.getString("country"); String nationality = rs.getString("nationality"); String religion = rs.getString("religion"); String emergencyContactName = rs.getString("emergency_contact_name"); String emergencyContactPhone = rs.getString("emergency_contact_phone_number"); String guardianName = rs.getString("guardian_name"); String guardianPhone = rs.getString("guardian_phone_number"); String highSchoolName = rs.getString("high_school_name"); double highSchoolGpa = rs.getDouble("high_school_gpa"); int highSchoolGradYear = rs.getInt("high_school_graduation_year"); String major = rs.getString("major"); String degreeLevel = rs.getString("degree_level"); String enrollmentStatus = rs.getString("enrollment_status"); builder.append(firstName).append("|"); builder.append(lastName).append("|"); builder.append(dob).append("|"); builder.append(gender).append("|"); builder.append(email).append("|"); builder.append(phone).append("|"); builder.append(address).append("|"); builder.append(city).append("|"); builder.append(state).append("|"); builder.append(zip).append("|"); builder.append(country).append("|"); builder.append(nationality).append("|"); builder.append(religion).append("|"); builder.append(emergencyContactName).append("|"); builder.append(emergencyContactPhone).append("|"); builder.append(guardianName).append("|"); builder.append(guardianPhone).append("|"); builder.append(highSchoolName).append("|"); builder.append(highSchoolGpa).append("|"); builder.append(highSchoolGradYear).append("|"); builder.append(major).append("|"); builder.append(degreeLevel).append("|"); builder.append(enrollmentStatus).append("\n"); writer.write(builder.toString()); builder.delete(0, builder.length()); } } private static long getSeconds(long start) { return Duration.ofMillis(System.currentTimeMillis() - start).getSeconds(); } }
以上就是java实现大文件导出的实现与优化的详细内容,更多关于java文件导出的资料请关注脚本之家其它相关文章!