gpt4 book ai didi

java - 使用多线程从mysql表读取时如何限制记录数

转载 作者:行者123 更新时间:2023-11-28 23:33:45 25 4
gpt4 key购买 nike

我的 mysql 表中有 150 万条记录。我正在尝试批量读取所有记录,即计划批量读取 1000 条记录并在控制台中打印这些记录。

为此,我计划使用 java 实现多线程概念。我该如何实现?

最佳答案

在 MySQL 中,您可以一次获取所有记录,或者以流式传输方式一条一条地获取它们(参见 this answer )。或者,您可以使用 limit 关键字进行分块(参见 this answer )。

无论您使用流式结果还是分块,您都可以在读取数据的同时使用多线程处理(或打印)数据。这通常是使用生产者-消费者模式完成的,在这种情况下,生产者从数据库中检索数据,将其放入队列中,消费者从队列中获取数据并进行处理(例如打印到控制台)。

虽然有一些管理开销:生产者和消费者都可能卡住或因错误而绊倒,并且都需要意识到这一点,以便它们不会永远挂起(可能会卡住您的应用程序)。这就是“合理”超时的用武之地(“合理”完全取决于您的情况)。

我已经尝试将它放在一个最小的运行示例中,但它仍然有很多代码(见下文)。有两个注释行可用于测试超时情况。还有一个 refreshTestData 变量可用于重新使用插入的记录(插入记录可能需要很长时间)。
为了保持简洁,省略了很多关键字,如private/public(即这些需要在非演示代码中添加)。

import java.sql.*;
import java.util.*;
import java.util.concurrent.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FetchRows {

private static final Logger log = LoggerFactory.getLogger(FetchRows.class);

public static void main(String[] args) {

try {
new FetchRows().print();
} catch (Exception e) {
e.printStackTrace();
}
}

void print() throws Exception {

Class.forName("com.mysql.jdbc.Driver").newInstance();
Properties dbProps = new Properties();
dbProps.setProperty("user", "test");
dbProps.setProperty("password", "test");

try (Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/test", dbProps)) {
try (Statement st = conn.createStatement()) {
prepareTestData(st);
}
// https://stackoverflow.com/a/2448019/3080094
try (Statement st = conn.createStatement(java.sql.ResultSet.TYPE_FORWARD_ONLY,
java.sql.ResultSet.CONCUR_READ_ONLY)) {
st.setFetchSize(Integer.MIN_VALUE);
fetchAndPrintTestData(st);
}
}
}

boolean refreshTestData = true;
int maxRecords = 5_555;

void prepareTestData(Statement st) throws SQLException {

int recordCount = 0;
if (refreshTestData) {
st.execute("drop table if exists fetchrecords");
st.execute("create table fetchrecords (id mediumint not null auto_increment primary key, created timestamp default current_timestamp)");
for (int i = 0; i < maxRecords; i++) {
st.addBatch("insert into fetchrecords () values ()");
if (i % 500 == 0) {
st.executeBatch();
log.debug("{} records available.", i);
}
}
st.executeBatch();
recordCount = maxRecords;
} else {
try (ResultSet rs = st.executeQuery("select count(*) from fetchrecords")) {
rs.next();
recordCount = rs.getInt(1);
}
}
log.info("{} records available for testing.", recordCount);
}

int batchSize = 1_000;
int maxBatchesInMem = 3;
int printFinishTimeoutS = 5;

void fetchAndPrintTestData(Statement st) throws SQLException, InterruptedException {

final BlockingQueue<List<FetchRecordBean>> printQueue = new LinkedBlockingQueue<List<FetchRecordBean>>(maxBatchesInMem);
final PrintToConsole printTask = new PrintToConsole(printQueue);
new Thread(printTask).start();
try (ResultSet rs = st.executeQuery("select * from fetchrecords")) {
List<FetchRecordBean> l = new LinkedList<>();
while (rs.next()) {
FetchRecordBean bean = new FetchRecordBean();
bean.setId(rs.getInt("id"));
bean.setCreated(new java.util.Date(rs.getTimestamp("created").getTime()));
l.add(bean);
if (l.size() % batchSize == 0) {
/*
* The printTask can stop itself when this producer is too slow to put records on the print-queue.
* Therefor, also check printTask.isStopping() to break the while-loop.
*/
if (printTask.isStopping()) {
throw new TimeoutException("Print task has stopped.");
}
enqueue(printQueue, l);
l = new LinkedList<>();
}
}
if (l.size() > 0) {
enqueue(printQueue, l);
}
} catch (TimeoutException | InterruptedException e) {
log.error("Unable to finish printing records to console: {}", e.getMessage());
printTask.stop();
} finally {
log.info("Reading records finished.");
if (!printTask.isStopping()) {
try {
enqueue(printQueue, Collections.<FetchRecordBean> emptyList());
} catch (Exception e) {
log.error("Unable to signal last record to print.", e);
printTask.stop();
}
}
if (!printTask.await(printFinishTimeoutS, TimeUnit.SECONDS)) {
log.error("Print to console task did not finish.");
}
}
}

int enqueueTimeoutS = 5;
// To test a slow printer, see also Thread.sleep statement in PrintToConsole.print.
// int enqueueTimeoutS = 1;

void enqueue(BlockingQueue<List<FetchRecordBean>> printQueue, List<FetchRecordBean> l) throws InterruptedException, TimeoutException {

log.debug("Adding {} records to print-queue.", l.size());
if (!printQueue.offer(l, enqueueTimeoutS, TimeUnit.SECONDS)) {
throw new TimeoutException("Unable to put print data on queue within " + enqueueTimeoutS + " seconds.");
}
}

int dequeueTimeoutS = 5;

class PrintToConsole implements Runnable {

private final BlockingQueue<List<FetchRecordBean>> q;
private final CountDownLatch finishedLock = new CountDownLatch(1);
private volatile boolean stop;

public PrintToConsole(BlockingQueue<List<FetchRecordBean>> q) {
this.q = q;
}

@Override
public void run() {

try {
while (!stop) {
List<FetchRecordBean> l = q.poll(dequeueTimeoutS, TimeUnit.SECONDS);
if (l == null) {
log.error("Unable to get print data from queue within {} seconds.", dequeueTimeoutS);
break;
}
if (l.isEmpty()) {
break;
}
print(l);
}
if (stop) {
log.error("Printing to console was stopped.");
}
} catch (Exception e) {
log.error("Unable to print records to console.", e);
} finally {
if (!stop) {
stop = true;
log.info("Printing to console finished.");
}
finishedLock.countDown();
}
}

void print(List<FetchRecordBean> l) {

log.info("Got list with {} records from print-queue.", l.size());
// To test a slow printer, see also enqueueTimeoutS.
// try { Thread.sleep(1500L); } catch (Exception ignored) {}
}

public void stop() {
stop = true;
}

public boolean isStopping() {
return stop;
}

public void await() throws InterruptedException {
finishedLock.await();
}

public boolean await(long timeout, TimeUnit tunit) throws InterruptedException {
return finishedLock.await(timeout, tunit);
}

}

class FetchRecordBean {

private int id;
private java.util.Date created;

public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public java.util.Date getCreated() {
return created;
}
public void setCreated(java.util.Date created) {
this.created = created;
}

}
}

依赖关系:

  • mysql:mysql-connector-java:5.1.38
  • org.slf4j:slf4j-api:1.7.20(并在控制台中显示日志记录:ch.qos.logback:logback-classic:1.1.7 和 ch.qos.logback:logback-core:1.1.7 )

关于java - 使用多线程从mysql表读取时如何限制记录数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36443790/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com