gpt4 book ai didi

sql - 以 SQL Server 最快的方式插入/更新 5000 万条记录

转载 作者:行者123 更新时间:2023-12-03 01:51:07 25 4
gpt4 key购买 nike

我正在尝试使用从临时表到基表的存储过程将 5000 万插入和更新到表中。但该过程花费的时间比预期要长得多,大约 5-6 小时。

我尝试实现一些索引来进行性能调整,但它们都没有在第一次工作。

有关如何处理和执行大数据插入的任何建议。

我们的 SSIS 程序将来自 CSV 转储的数据转储到临时表中,然后我们的存储过程最后运行,下面的插入会导致时间延迟,

代码片段 - 暂存到基表插入

    INSERT INTO [dbo].[Entries] (
[TransactionID]
,[TimekID]
,[ResourceID]
,[INVOICE_DATE]
,[INVOICE_NUMBER]
,[CLIENT_ID]
,[LAW_FIRM_MATTER_ID]
,[INVOICE_TOTAL]
,[BILLING_START_DATE]
,[BILLING_END_DATE]
,[INVOICE_DESCRIPTION]
,[ITEM_NUMBER]
,[EXP_FEE_INV_ADJ_TYPE]
,[ITEM_NUMBER_OF_UNITS]
,[ITEM_ADJUSTMENT_AMOUNT]
,[ITEM_TOTAL]
,[ITEM_DATE]
,[ITEM_TimeK_CODE]
,[ITEM_EXPENSE_CODE]
,[ITEM_ACTIVITY_CODE]
,[TIMEKEEPER_ID]
,[ITEM_DESCRIPTION]
,[LAW_FIRM_ID]
,[ITEM_UNIT_COST]
,[TIMEKEEPER_NAME]
,[TIMEKEEPER_CLASSIFICATION]
,[CLIENT_MATTER_ID]
,[TIMEKEEPER_EMAIL]
,[CreatedBy]
,[CreatedOn]
,[ModifiedBy]
,[ModifiedOn]
,[DeletedFlag]
,[sid]
,[ITEM_BASE_NUMBER_OF_UNITS]
,[ITEM_BASE_TOTAL]
,[UNIQUE_ID]
,[IsCancelled]
,[ENTRY_CLOSED_DATE]
)
SELECT ts.TransactionID
,(
SELECT t1.ID
FROM [dbo].[time] t1
INNER JOIN [dbo].[TimekMaster] tm ON t1.ABATimekCode = tm.ID
AND (
tm.Code = ts.ITEM_TimeK_CODE
OR tm.NAME = ts.ITEM_TimeK_CODE
)
WHERE t1.sid = @SID
AND t1.DeletedFlag = 0
AND m.id = t1.matterid
) ID
,(
SELECT r.[ID]
FROM [dbo].[Resource] r
WHERE r.[FirmUserID] = ts.[TIMEKEEPER_ID]
AND r.[sid] = @SID
AND r.[DeletedFlag] = 0
) ResourceID
,ts.INVOICE_DATE
,ts.INVOICE_NUMBER
,ts.CLIENT_ID
,ts.LAW_FIRM_MATTER_ID
,REPLACE(ts.INVOICE_TOTAL, ',', '')
,ts.BILLING_START_DATE
,ts.BILLING_END_DATE
,ts.INVOICE_DESCRIPTION
,ts.ITEM_NUMBER
,ts.EXP_FEE_INV_ADJ_TYPE
,REPLACE(ts.ITEM_NUMBER_OF_UNITS, ',', '')
,REPLACE(ts.ITEM_ADJUSTMENT_AMOUNT, ',', '')
,REPLACE(ts.ITEM_TOTAL, ',', '')
,ts.[ITEM_DATE] --ITEM_DATE, varchar(1000),>
,ts.ITEM_TimeK_CODE
,ts.ITEM_EXPENSE_CODE
,ts.ITEM_ACTIVITY_CODE
,ts.TIMEKEEPER_ID
,CAST(ts.ITEM_DESCRIPTION AS VARCHAR(4000))
,ts.LAW_FIRM_ID
,REPLACE(ts.ITEM_UNIT_COST, ',', '')
,ts.TIMEKEEPER_NAME
,ts.TIMEKEEPER_CLASSIFICATION
,ts.CLIENT_MATTER_ID
,ts.TIMEKEEPER_EMAIL
,0 --<CreatedBy, int,>
,GETDATE() --<CreatedOn, datetime2(7),>
,NULL --<ModifiedBy, int,>
,NULL --<ModifiedOn, datetime2(7),>
,0 --<DeletedFlag, bit,>
,@SID --<sid, int,>
,REPLACE(ts.[ITEM_NUMBER_OF_UNITS_BASE], ',', '')
,REPLACE(ts.[ITEM_BASE_TOTAL], ',', '')
,ts.[UNIQUE_ITEM_NUMBER] --<UNIQUE_DOCKET_ID, varchar(1000),>
,0 --ts.[ITEM_CANCEL] --<IsCancelled, bit,>
,ts.[ENTRY_CLOSED_DATE] --<ENTRY_CLOSED_DATE, varchar(100),>)
FROM E_Staging ts
INNER JOIN matter m ON m.firmmatternumber = ts.LAW_FIRM_MATTER_ID
AND m.[sid] = @SID
AND m.[DeletedFlag] = 0
WHERE 1 = (
SELECT COUNT(1)
FROM [dbo].[Time] t1
INNER JOIN [dbo].[TimekMaster] tm ON t1.ABATimekCode = tm.ID
AND (
tm.Code = ts.ITEM_TimeK_CODE
OR tm.NAME = ts.ITEM_TimeK_CODE
)
WHERE t1.sid = @SID
AND t1.DeletedFlag = 0
AND m.id = t1.matterid
)
AND ts.[Sid] = @SID
AND ts.[TransactionID] = @TxnID
AND NOT EXISTS (
SELECT 1
FROM [dbo].[Entries] te
WHERE te.[sid] = @SID
AND te.[DeletedFlag] = 0
AND ts.[UNIQUE_ITEM_NUMBER] = te.[UNIQUE_ID]
)

基础表

CREATE TABLE [dbo].[Entries] (
[ID] INT IDENTITY(1, 1) NOT NULL
,[TransactionID] INT NOT NULL
,[TaskID] INT NULL
,[ResourceID] INT NULL
,[INVOICE_DATE] VARCHAR(1000) NULL
,[INVOICE_NUMBER] VARCHAR(1000) NULL
,[CLIENT_ID] VARCHAR(1000) NULL
,[LAW_FIRM_MATTER_ID] VARCHAR(1000) NULL
,[INVOICE_TOTAL] NUMERIC(14, 2) NULL
,[BILLING_START_DATE] VARCHAR(1000) NULL
,[BILLING_END_DATE] VARCHAR(1000) NULL
,[INVOICE_DESCRIPTION] VARCHAR(1000) NULL
,[LINE_ITEM_NUMBER] VARCHAR(1000) NULL
,[EXP_FEE_INV_ADJ_TYPE] VARCHAR(1000) NULL
,[LINE_ITEM_NUMBER_OF_UNITS] NUMERIC(14, 2) NULL
,[LINE_ITEM_ADJUSTMENT_AMOUNT] NUMERIC(14, 2) NULL
,[LINE_ITEM_TOTAL] NUMERIC(14, 2) NULL
,[LINE_ITEM_DATE] VARCHAR(1000) NULL
,[LINE_ITEM_TASK_CODE] VARCHAR(1000) NULL
,[LINE_ITEM_EXPENSE_CODE] VARCHAR(1000) NULL
,[LINE_ITEM_ACTIVITY_CODE] VARCHAR(1000) NULL
,[TIMEKEEPER_ID] VARCHAR(1000) NULL
,[LINE_ITEM_DESCRIPTION] VARCHAR(4000) NULL
,[LAW_FIRM_ID] VARCHAR(1000) NULL
,[LINE_ITEM_UNIT_COST] NUMERIC(14, 2) NULL
,[TIMEKEEPER_NAME] VARCHAR(1000) NULL
,[TIMEKEEPER_CLASSIFICATION] VARCHAR(1000) NULL
,[CLIENT_MATTER_ID] VARCHAR(1000) NULL
,[TIMEKEEPER_EMAIL] VARCHAR(1000) NULL
,[CreatedBy] INT NULL
,[CreatedOn] DATETIME2(7) DEFAULT(sysutcdatetime()) NULL
,[ModifiedBy] INT NULL
,[ModifiedOn] DATETIME2(7) NULL
,[DeletedFlag] BIT DEFAULT((0)) NULL
,[sid] INT NOT NULL
,[LINE_ITEM_BASE_NUMBER_OF_UNITS] NUMERIC(14, 2) NULL
,[LINE_ITEM_BASE_TOTAL] NUMERIC(14, 2) NULL
,[UNIQUE_ID] VARCHAR(1000) NULL
,[IsCancelled] BIT DEFAULT((0)) NOT NULL
,[ENTRY_CLOSED_DATE] VARCHAR(100) NULL
,[MappingStatus] TINYINT DEFAULT((0)) NOT NULL
,[LINE_ITEM_PHASE_CODE] VARCHAR(1000) NULL
,[Override] BIT DEFAULT((0)) NOT NULL
,[e_INVOICE_DESCRIPTION] VARCHAR(1000) NULL
,[e_CLIENT_ID] VARBINARY(8000) NULL
,[e_LINE_ITEM_DESCRIPTION] VARCHAR(8000) NULL
,[e_CLIENT_MATTER_ID] VARCHAR(1000) NULL
,[AmountDocketed] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[AmountBilled] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[AmountWrittenDown] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[AmountWrittenUp] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[AmountWIP] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[HourDocketed] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[HourBilled] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[HourWrittenDown] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[HourWrittenUp] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[HourWIP] NUMERIC(14, 2) DEFAULT((0)) NOT NULL
,[MappedBy] INT NULL DEFAULT((0))
,[MappedOn] DATETIME2 NULL DEFAULT(sysutcdatetime())
,[RemapBy] INT NULL
,CONSTRAINT [PK_utb_entries] PRIMARY KEY CLUSTERED ([ID] ASC)
,FOREIGN KEY ([TaskID]) REFERENCES [dbo].[Task]([ID])
,FOREIGN KEY ([TaskID]) REFERENCES [dbo].[Task]([ID])
);
GO

CREATE NONCLUSTERED INDEX [Entries_TaskID] ON [dbo].[Entries] ([TaskID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_LAW_FIRM_MATTER_ID] ON [dbo].[Entries] ([LAW_FIRM_MATTER_ID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_UNIQUE_DOCKET_ID] ON [dbo].[Entries] ([UNIQUE_DOCKET_ID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_ResourceID] ON [dbo].[Entries] ([ResourceID] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_INVOICE_NUMBER] ON [dbo].[Entries] ([INVOICE_NUMBER] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_EXP_FEE_INV_ADJ_TYPE] ON [dbo].[Entries] ([EXP_FEE_INV_ADJ_TYPE] ASC);
GO

CREATE NONCLUSTERED INDEX [Entries_IsCancelled] ON [dbo].[Entries] ([IsCancelled] ASC);
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped] ON [dbo].[Entries] (
[TaskID] ASC
,[sid] ASC
,[DeletedFlag] ASC
,[ID] ASC
) INCLUDE (
[INVOICE_NUMBER]
,[LAW_FIRM_MATTER_ID]
,[LINE_ITEM_NUMBER_OF_UNITS]
,[LINE_ITEM_TOTAL]
,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
,[LINE_ITEM_BASE_TOTAL]
,[IsCancelled]
)
WITH (
SORT_IN_TEMPDB = OFF
,DROP_EXISTING = OFF
,ONLINE = OFF
) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped1] ON [dbo].[Entries] (
[sid] ASC
,[DeletedFlag] ASC
,[IsCancelled] ASC
) INCLUDE (
[ID]
,[LAW_FIRM_MATTER_ID]
,[EXP_FEE_INV_ADJ_TYPE]
,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
,[LINE_ITEM_BASE_TOTAL]
)
WITH (
SORT_IN_TEMPDB = OFF
,DROP_EXISTING = OFF
,ONLINE = OFF
) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped2] ON [dbo].[Entries] (
[sid] ASC
,[DeletedFlag] ASC
,[ID] ASC
,[LINE_ITEM_BASE_TOTAL] ASC
,[TaskID] ASC
) INCLUDE ([LAW_FIRM_MATTER_ID])
WITH (
SORT_IN_TEMPDB = OFF
,DROP_EXISTING = OFF
,ONLINE = OFF
) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped3] ON [dbo].[Entries] (
[TaskID] ASC
,[sid] ASC
,[DeletedFlag] ASC
,[ID] ASC
,[ResourceID] ASC
) INCLUDE (
[INVOICE_NUMBER]
,[EXP_FEE_INV_ADJ_TYPE]
,[LINE_ITEM_NUMBER_OF_UNITS]
,[LINE_ITEM_TOTAL]
,[LINE_ITEM_DATE]
,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
,[LINE_ITEM_BASE_TOTAL]
,[IsCancelled]
,[ENTRY_CLOSED_DATE]
)
WITH (
SORT_IN_TEMPDB = OFF
,DROP_EXISTING = OFF
,ONLINE = OFF
) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [IX_Entries_Grouped4] ON [dbo].[Entries] (
[TaskID] ASC
,[sid] ASC
,[IsCancelled] ASC
,[DeletedFlag] ASC
,[ID] ASC
,[ResourceID] ASC
) INCLUDE (
[INVOICE_NUMBER]
,[EXP_FEE_INV_ADJ_TYPE]
,[LINE_ITEM_NUMBER_OF_UNITS]
,[LINE_ITEM_TOTAL]
,[LINE_ITEM_DATE]
,[LINE_ITEM_BASE_NUMBER_OF_UNITS]
,[LINE_ITEM_BASE_TOTAL]
,[ENTRY_CLOSED_DATE]
)
WITH (
SORT_IN_TEMPDB = OFF
,DROP_EXISTING = OFF
,ONLINE = OFF
) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [Entries_LINE_ITEM_DATE] ON [dbo].[Entries] ([LINE_ITEM_DATE])
GO

CREATE STATISTICS [Stat_Entries_1] ON [dbo].[Entries] (
[IsCancelled]
,[ID]
,[TaskID]
,[sid]
)
GO

暂存表

CREATE TABLE [dbo].[E_Staging] (
[ID] INT IDENTITY(1, 1) NOT NULL
,[INVOICE_DAE] VARCHAR(1000) NULL
,[INVOICE_NUMBER] VARCHAR(1000) NULL
,[CLIENT_ID] VARCHAR(1000) NULL
,[LAW_FIRM_MATER_ID] VARCHAR(1000) NULL
,[INVOICE_TOTAL] VARCHAR(1000) NULL
,[BILLING_START_DAE] VARCHAR(1000) NULL
,[BILLING_END_DAE] VARCHAR(1000) NULL
,[INVOICE_DESCRIPTION] VARCHAR(8000) NULL
,[IEM_NUMBER] VARCHAR(1000) NULL
,[EXP_FEE_INV_ADJ_TYPE] VARCHAR(1000) NULL
,[IEM_NUMBER_OF_UNITS] VARCHAR(1000) NULL
,[IEM_NUMBER_OF_UNITS_BASE] VARCHAR(1000) NULL
,[IEM_ADJUSTMENT_AMOUNT] VARCHAR(1000) NULL
,[IEM_TOTAL] VARCHAR(1000) NULL
,[IEM_BASE_TOTAL] VARCHAR(1000) NULL
,[IEM_DAE] VARCHAR(100) NULL
,[IEM_TASK_CODE] VARCHAR(1000) NULL
,[IEM_EXPENSE_CODE] VARCHAR(1000) NULL
,[IEM_ACTIVITY_CODE] VARCHAR(1000) NULL
,[TIMEKEEPER_ID] VARCHAR(1000) NULL
,[IEM_DESCRIPTION] VARCHAR(8000) NULL
,[LAW_FIRM_ID] VARCHAR(1000) NULL
,[IEM_UNIT_COST] VARCHAR(1000) NULL
,[TIMEKEEPER_NAME] VARCHAR(1000) NULL
,[TIMEKEEPER_CLASSIFICATION] VARCHAR(1000) NULL
,[CLIENT_MATER_ID] VARCHAR(1000) NULL
,[TIMEKEEPER_EMAIL] VARCHAR(1000) NULL
,[UNIQUE_ITEM_NUMBER] VARCHAR(1000) NULL
,[IEM_CANCEL] VARCHAR(1000) NULL
,[Sid] INT NULL
,[TransactionID] INT NULL
,[ValidationStatus] VARCHAR(20) NULL
,[ValidationMessage] VARCHAR(200) NULL
,[UploadStatus] VARCHAR(20) NULL
,[StatusMessage] VARCHAR(50) NULL
,[CREATEdOn] DAETIME NULL DEFAULT(sysutcdaEtime())
,[FirmLastModifiedon] VARCHAR(1000) NULL
,[ENTRY_CLOSED_DAE] VARCHAR(1000) NULL
,[UNIQUE_Docket_ID2] VARCHAR(1000) NULL
,[DockeEd_Released_Billed] VARCHAR(1000) NULL
,[IEM_PHASE_CODE] VARCHAR(1000) NULL
,[WriE_Down] VARCHAR(200) NULL
,[e_ClientId] VARBINARY(MAX) NULL
,[e_INVOICE_DESCRIPTION] VARCHAR(1000) NULL
,[e_IEM_DESCRIPTION] VARCHAR(8000) NULL
,[e_CLIENT_MATER_ID] VARCHAR(1000) NULL
,CONSTRAINT [PK_E_Staging] PRIMARY KEY CLUSERED([ID] ASC)
);
GO

CREATE NONCLUSERED INDEX [nci_wi_E_Staging_2150E36DEE003CE53D44FC548A4CD6CC] ON [dbo].[E_Staging] (
[Sid] ASC
,[TransactionID] ASC
)
GO

CREATE NONCLUSERED INDEX IX_E_Staging_CREATEDON ON [dbo].[E_Staging] (CREATEDON)
GO

CREATE NONCLUSERED INDEX IX_E_Staging_LAW_FIRM_MATER_ID ON [dbo].[E_Staging] (LAW_FIRM_MATER_ID)
GO

CREATE NONCLUSERED INDEX IX_E_Staging_UNIQUE_IEM_NUMBER ON [dbo].[E_Staging] (UNIQUE_IEM_NUMBER)
GO

最佳答案

我建议您不要在目标表上创建新索引,理想情况下您应该在插入之前禁用索引。

  1. 禁用目标表上的索引和约束。
  2. 插入记录。
  3. 启用索引和约束。

这将为您提供更好的插入性能。

除此之外,您还需要优化您的选择查询。如果您也共享选择查询,您可以获得更多帮助。

关于sql - 以 SQL Server 最快的方式插入/更新 5000 万条记录,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/54435330/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com