gpt4 book ai didi

sql - 如何处理 Kimball 数据集市中迟到的维度和 NULL 业务键?

转载 作者:行者123 更新时间:2023-12-03 00:13:58 27 4
gpt4 key购买 nike

我正在尝试实现 Kimball 数据集市,它使用维度表中的 -1 和 -2 行来表示迟到的维度和 NULL 业务键。我下面有一个示例代码,它为事实和维度数据创建一个临时表,为数据集市创建两个维度表和一个事实表。这是我的示例代码,其中包含 SQL 数据:

--drop table stg_sales
--go
CREATE TABLE dbo.stg_sales
(
stg_sales_id INT IDENTITY(1, 1) NOT NULL,
sales_number INT NOT NULL,
sales_amt INT NULL,
cust_number INT NULL,
cust_firstname NVARCHAR(50) NULL,
cust_lastname NVARCHAR(100) NULL,
cust_address NVARCHAR(500) NULL,
salesperson_number INT NULL,
CONSTRAINT pk_stg_sales PRIMARY KEY (stg_sales_id)
)

go

INSERT stg_sales
(sales_number,
sales_amt,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
salesperson_number)
VALUES (123,
434,
2342,
'Jim',
'Moriaty',
'something',
23)

INSERT stg_sales
(sales_number,
sales_amt,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
salesperson_number)
VALUES (124,
234,
2342,
'Jim',
'Moriaty',
'something',
23)

INSERT stg_sales
(sales_number,
sales_amt,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
salesperson_number)
VALUES (125,
434,
4545,
'Joe',
'Esk',
'someother',
24)

INSERT stg_sales
(sales_number,
sales_amt,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
salesperson_number)
VALUES (126,
434,
5555,
'Daniel',
'Hart',
'Someaddr',
NULL) --salesperson_number business key missing here

INSERT stg_sales
(sales_number,
sales_amt,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
salesperson_number)
VALUES (127,
333,
4444,
'Pat',
'Smith',
'Someaddr',
30)

SELECT *
FROM stg_sales

--create a dimension and fact tables
--drop table dbo.dim_customer
--go
CREATE TABLE dbo.dim_customer
(
customer_wid INT IDENTITY(1, 1) NOT NULL,
cust_number INT NULL,
cust_firstname NVARCHAR(50) NULL,
cust_lastname NVARCHAR(100) NULL,
cust_address NVARCHAR(500) NULL,
date_insert DATETIME2 NOT NULL DEFAULT (Getdate()),
date_update DATETIME2 NULL,
is_current BIT NOT NULL
CONSTRAINT pk_dim_customer PRIMARY KEY (customer_wid)
CONSTRAINT chk_is_current CHECK (is_current IN (0, 1))
)

go

SET IDENTITY_INSERT dbo.dim_customer ON

INSERT dbo.dim_customer
(customer_wid,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
date_insert,
date_update,
is_current)
VALUES (-1,
-1,
'unknown',
'unknown',
'unknown',
Getdate(),
Getdate(),
1)

INSERT dbo.dim_customer
(customer_wid,
cust_number,
cust_firstname,
cust_lastname,
cust_address,
date_insert,
date_update,
is_current)
VALUES (-2,
-2,
'Error',
'Error',
'Error',
Getdate(),
Getdate(),
1)

SET IDENTITY_INSERT dbo.dim_customer OFF

--insert data into dimension table
INSERT dbo.dim_customer
(cust_number,
cust_firstname,
cust_lastname,
cust_address,
is_current)
SELECT DISTINCT cust_number,
cust_firstname,
cust_lastname,
cust_address,
1 AS is_current
FROM dbo.stg_sales
WHERE cust_number <> 4444 --left one record off to simulate the situation where you don't have corrensponding row in dim table (late arriving dimension)
SELECT *
FROM dbo.dim_customer

DROP TABLE dbo.dim_salesperson

--create salesperson table
CREATE TABLE dbo.dim_salesperson
(
salesperson_wid INT IDENTITY(1, 1) NOT NULL,
salesperson_number INT NULL,
salesperson_firstname NVARCHAR(50) NULL,
salesperson_lastname NVARCHAR(100) NULL,
salesperson_address NVARCHAR(500) NULL,
date_insert DATETIME2 NOT NULL DEFAULT (Getdate()),
date_update DATETIME2 NULL,
is_current BIT NOT NULL
CONSTRAINT pk_dim_salesperson PRIMARY KEY (salesperson_wid)
CONSTRAINT chk_dim_salesperson_is_current CHECK (is_current IN (0, 1))
)

go

SET IDENTITY_INSERT dbo.dim_salesperson ON

INSERT dbo.dim_salesperson
(salesperson_wid,
salesperson_number,
salesperson_firstname,
salesperson_lastname,
salesperson_address,
date_insert,
date_update,
is_current)
VALUES (-1,
-1,
'Not available',
'Not available',
'Not available',
Getdate(),
Getdate(),
1)

INSERT dbo.dim_salesperson
(salesperson_wid,
salesperson_number,
salesperson_firstname,
salesperson_lastname,
salesperson_address,
date_insert,
date_update,
is_current)
VALUES (-2,
-2,
'Error',
'Error',
'Error',
Getdate(),
Getdate(),
1)

SET IDENTITY_INSERT dbo.dim_salesperson OFF

--insert data into salesperson
INSERT dbo.dim_salesperson
(salesperson_number,
salesperson_firstname,
salesperson_lastname,
salesperson_address,
is_current)
VALUES (23,
'John',
'Fox',
'something',
1)

INSERT dbo.dim_salesperson
(salesperson_number,
salesperson_firstname,
salesperson_lastname,
salesperson_address,
is_current)
VALUES (24,
'Hadley',
'Fox',
'something',
1)

INSERT dbo.dim_salesperson
(salesperson_number,
salesperson_firstname,
salesperson_lastname,
salesperson_address,
is_current)
VALUES (30,
'Ashley',
'Fox',
'something',
1)

SELECT *
FROM dbo.dim_salesperson

SELECT *
FROM dbo.stg_sales

--create and populate the fact table
--drop table dbo.f_sales
--go
CREATE TABLE dbo.f_sales
(
sales_number INT NOT NULL,
customer_wid INT NOT NULL,
salesperson_wid INT NOT NULL,
sales_amt INT NULL
CONSTRAINT pk_f_sales PRIMARY KEY (sales_number)
CONSTRAINT fk_customer_wid FOREIGN KEY (customer_wid) REFERENCES
dbo.dim_customer(customer_wid),
CONSTRAINT fk_salesperson_wid FOREIGN KEY (salesperson_wid) REFERENCES
dbo.dim_salesperson(salesperson_wid)
)

--populate the fact table
INSERT dbo.f_sales
(sales_number,
customer_wid,
salesperson_wid,
sales_amt)
SELECT stg.sales_number,
Isnull(dimcust.customer_wid, -1) AS customer_wid,
--this is maybe correct way to assign -1 foreign key when there is no corresponding dimension row in the dim table
Isnull(dimsp.salesperson_wid, -2) AS salesperson_wid,
--NOT CORRECT, how to assign -2 foreign key when the business key is NULL in the source?
stg.sales_amt
FROM dbo.stg_sales AS stg
LEFT JOIN dbo.dim_customer AS dimcust
ON stg.cust_number = dimcust.cust_number
LEFT JOIN dbo.dim_salesperson AS dimsp
ON stg.salesperson_number = dimsp.salesperson_number

SELECT *
FROM dbo.f_sales

如何为源系统中缺少业务键的行分配 -2。您可以从 Kimball 那里阅读有关此实现背后的理论的更多信息:

这基本上就是我想要实现的目标:

Handling NULL in dimensions and measures

编辑:

我想我可以在左连接中使用COALESCEISNULL,它似乎会产生正确的结果:

INSERT dbo.f_sales
(sales_number,
customer_wid,
salesperson_wid,
sales_amt)
SELECT stg.sales_number,
Isnull(dimcust.customer_wid, -1) AS customer_wid,
--this is maybe correct way to assign -1 foreign key when there is no corresponding dimension row in the dim table
dimsp.salesperson_wid,
stg.sales_amt
FROM dbo.stg_sales AS stg
LEFT JOIN dbo.dim_customer AS dimcust
ON COALESCE(stg.cust_number, -2) = dimcust.cust_number
LEFT JOIN dbo.dim_salesperson AS dimsp
ON COALESCE(stg.salesperson_number, -2) = dimsp.salesperson_number

最佳答案

纯粹作为一种查找技术

-- add nullable keys to the staging table
alter table dbo.stg_sales ADD
sales_person_wid integer null
, customer_wid integer null
;

-- insert to staging table here (as in your example)

-- lookup sales person key
update dbo.stg_sales
set sales_person_wid = p.sales_person_wid
from dbo.stg_sales as s , dbo.dim_salesperson as p
where s.salesperson_number = p.salesperson_number ;

-- decide what to do with missing business keys
update dbo.stg_sales
set sales_person_wid = -2
where sales_person_wid is null ;


-- do similar for customer

-- now all keys in staging table are not null

-- load to fact table

但是,通常的技术是在提取或清理期间为事务分配特殊(未知、不适用、错误)业务 key 。换句话说,可以在记录进入登台表之前分配特殊的Error业务键。

最后一点,迟到维度意味着业务 key (salesperson_number) 为操作系统所知,但交易(销售事实)在维度数据之前到达仓库。因此,salesperson_number 将为not null,但不会存在于维度表中。您必须将此交易保存在某处,并在记录到达维度后尝试更新 FK (salesperson_wid);大约一天后。

关于sql - 如何处理 Kimball 数据集市中迟到的维度和 NULL 业务键?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/12853932/

27 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com