gpt4 book ai didi

java - 需要在 5 秒内使用 hibernate 在 mysql 中插入 100000 行

转载 作者:IT老高 更新时间:2023-10-29 00:12:00 27 4
gpt4 key购买 nike

我正在尝试使用 Hibernate(JPA) 在 5 秒内在 MYSQL 表中插入 100,000 行。我已经尝试了hibernate提供的每一个技巧,但仍然不能超过35秒。

第一个优化:我从 IDENTITY 序列生成器开始,插入需要 60 秒。后来我放弃了序列生成器并开始分配 @Id通过阅读 MAX(id)并使用 AtomicInteger.incrementAndGet()自己分配字段。这将插入时间减少到 35 秒。

第二次优化:我通过添加启用批量插入

<prop key="hibernate.jdbc.batch_size">30</prop>
<prop key="hibernate.order_inserts">true</prop>
<prop key="hibernate.current_session_context_class">thread</prop>
<prop key="hibernate.jdbc.batch_versioned_data">true</prop>

到配置。我震惊地发现批量插入完全没有减少插入时间。还是 35 秒!

现在,我正在考虑尝试使用多个线程进行插入。有人有任何指示吗?我应该选择 MongoDB 吗?

以下是我的配置:1. hibernate 配置`

<bean id="entityManagerFactoryBean" class="org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean">
<property name="dataSource" ref="dataSource" />
<property name="packagesToScan" value="com.progresssoft.manishkr" />
<property name="jpaVendorAdapter">
<bean class="org.springframework.orm.jpa.vendor.HibernateJpaVendorAdapter" />
</property>
<property name="jpaProperties">
<props>
<prop key="hibernate.hbm2ddl.auto">${hibernate.hbm2ddl.auto}</prop>
<prop key="hibernate.dialect">${hibernate.dialect}</prop>
<prop key="hibernate.show_sql">${hibernate.show_sql}</prop>
<prop key="hibernate.format_sql">${hibernate.format_sql}</prop>
<prop key="hibernate.jdbc.batch_size">30</prop>
<prop key="hibernate.order_inserts">true</prop>
<prop key="hibernate.current_session_context_class">thread</prop>
<prop key="hibernate.jdbc.batch_versioned_data">true</prop>
</props>
</property>
</bean>

<bean class="org.springframework.jdbc.datasource.DriverManagerDataSource"
id="dataSource">
<property name="driverClassName" value="${database.driver}"></property>
<property name="url" value="${database.url}"></property>
<property name="username" value="${database.username}"></property>
<property name="password" value="${database.password}"></property>
</bean>

<bean id="transactionManager" class="org.springframework.orm.jpa.JpaTransactionManager">
<property name="entityManagerFactory" ref="entityManagerFactoryBean" />
</bean>



<tx:annotation-driven transaction-manager="transactionManager" />

`

  1. 实体配置:

`

@Entity
@Table(name = "myEntity")
public class MyEntity {

@Id
private Integer id;

@Column(name = "deal_id")
private String dealId;

....
....

@Temporal(TemporalType.TIMESTAMP)
@Column(name = "timestamp")
private Date timestamp;

@Column(name = "amount")
private BigDecimal amount;

@OneToOne(cascade = CascadeType.ALL)
@JoinColumn(name = "source_file")
private MyFile sourceFile;

public Deal(Integer id,String dealId, ....., Timestamp timestamp, BigDecimal amount, SourceFile sourceFile) {
this.id = id;
this.dealId = dealId;
...
...
...
this.amount = amount;
this.sourceFile = sourceFile;
}


public String getDealId() {
return dealId;
}

public void setDealId(String dealId) {
this.dealId = dealId;
}

...

...


....

public BigDecimal getAmount() {
return amount;
}

public void setAmount(BigDecimal amount) {
this.amount = amount;
}

....


public Integer getId() {
return id;
}

public void setId(Integer id) {
this.id = id;
}

`

  1. 持久化代码(服务):

`

@Service
@Transactional
public class ServiceImpl implements MyService{

@Autowired
private MyDao dao;
....

`void foo(){
for(MyObject d : listOfObjects_100000){
dao.persist(d);
}
}

`4.道类:

`

@Repository
public class DaoImpl implements MyDao{

@PersistenceContext
private EntityManager em;

public void persist(Deal deal){
em.persist(deal);
}
}

`

日志:`

DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:32.906 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:32.906 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:32.906 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:32.906 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:32.906 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:32.906 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:32.906 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:32.906 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:32.906 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:32.906 [http-nio-8080-exec-2]

......

DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:34.002 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:34.002 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:34.002 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:34.002 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:34.002 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:34.002 [http-nio-8080-exec-2] DEBUG o.h.e.j.b.internal.AbstractBatchImpl - Reusing batch statement
18:26:34.002 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - insert into deal (amount, deal_id, timestamp, from_currency, source_file, to_currency, id) values (?, ?, ?, ?, ?, ?, ?)
18:26:34.002 [http-nio-8080-exec-2] DEBUG o.h.e.j.batch.internal.BatchingBatch - Executing batch size: 27
18:26:34.011 [http-nio-8080-exec-2] DEBUG org.hibernate.SQL - update deal_source_file set invalid_rows=?, source_file=?, valid_rows=? where id=?
18:26:34.015 [http-nio-8080-exec-2] DEBUG o.h.e.j.batch.internal.BatchingBatch - Executing batch size: 1
18:26:34.018 [http-nio-8080-exec-2] DEBUG o.h.e.t.i.jdbc.JdbcTransaction - committed JDBC Connection
18:26:34.018 [http-nio-8080-exec-2] DEBUG o.h.e.t.i.jdbc.JdbcTransaction - re-enabling autocommit
18:26:34.032 [http-nio-8080-exec-2] DEBUG o.s.orm.jpa.JpaTransactionManager - Closing JPA EntityManager [org.hibernate.jpa.internal.EntityManagerImpl@2354fb09] after transaction
18:26:34.032 [http-nio-8080-exec-2] DEBUG o.s.o.jpa.EntityManagerFactoryUtils - Closing JPA EntityManager
18:26:34.032 [http-nio-8080-exec-2] DEBUG o.h.e.j.internal.JdbcCoordinatorImpl - HHH000420: Closing un-released batch
18:26:34.032 [http-nio-8080-exec-2] DEBUG o.h.e.j.i.LogicalConnectionImpl - Releasing JDBC connection
18:26:34.033 [http-nio-8080-exec-2] DEBUG o.h.e.j.i.LogicalConnectionImpl - Released JDBC connection

'

最佳答案

在尝试了所有可能的解决方案后,我终于找到了在 5 秒内插入 100,000 行的解决方案!

我尝试过的事情:

1) 使用 AtomicInteger 将 hibernate/数据库的 AUTOINCREMENT/GENERATED id 替换为自行生成的 ID

2) 使用 batch_size=50 启用 batch_inserts

3) 每 'batch_size' 次persist() 调用后刷新缓存

4) 多线程(这个没试过)

最后起作用的是使用 native 多插入查询并在一个 sql 插入查询中插入 1000 行,而不是在每个实体上使用 persist()。为了插入 100,000 个实体,我创建了一个像这样的原生查询 "INSERT into MyTable VALUES (x,x,x),(x,x,x).......(x,x,x)" [在一个sql插入查询中插入1000行]

现在插入 100,000 条记录大约需要 3 秒!所以瓶颈是orm本身!对于批量插入,唯一可行的方法是原生插入查询!

关于java - 需要在 5 秒内使用 hibernate 在 mysql 中插入 100000 行,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44243608/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com