gpt4 book ai didi

google-bigquery - 查询执行期间资源超出。大查询

转载 作者:行者123 更新时间:2023-12-04 15:31:13 26 4
gpt4 key购买 nike

全部,

我一直在尝试通过 BigQuery 运行一些更大的分析代码,但我继续遇到许多状态和存在的数据量的问题。我们谈论的是多年的数据。可能是我的查询不符合标准,但是我需要尝试根据特定组来获取总和。

我需要在查询中更改什么以防止资源超出错误?

SELECT
COMPANY_NAME,
RATING_CLASS,
COMPANY_KEY,
-- State Info & Calculations
-- Over is used as a WINDOW function to SUM ALL results within the given query
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3 Info & Calculations
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 Info & Calculations
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3
ZIP3,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP3) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP3, company_key, rating_class) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
ZIP5,
COUNT(DISTINCT logging_key) OVER (PARTITION BY ZIP5) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
COUNT(*) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE
WHEN lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_RANK,
SUM(CASE
WHEN top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE
WHEN DISCOUNTED_lowest = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE
WHEN DISCOUNTED_top5 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_top10 = TRUE THEN 1
ELSE 0 END) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) OVER (PARTITION BY ZIP5, company_key, rating_class) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
LOWEST,
RATIO_TO_MIN,
RATE_ORDER,
TOP5,
TOP10,
CENT_DIFF,
DISCOUNTED_LOWEST,
DISCOUNTED_RATIO_TO_MIN,
DISCOUNTED_RATE_ORDER,
DISCOUNTED_TOP5,
DISCOUNTED_TOP10,
DISCOUNTED_CENT_DIFF,
LOGGING_KEY )
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
-- ZIP3 General
ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK,
ZIP3_TOP5,
ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5 General
ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK,
ZIP5_TOP5,
ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF

更新了带有建议修复的查询:
SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
state_count.STATE_MED_SUPP_TOOL_NUM_QUOTE AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_COUNT.ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_SUB.ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_SUB.ZIP3_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_SUB.ZIP3_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_SUB.ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_SUB.ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_SUB.ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_SUB.ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_SUB.ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_SUB.ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_COUNT.ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_SUB.ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_SUB.ZIP5_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_SUB.ZIP5_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_SUB.ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_SUB.ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_SUB.ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_SUB.ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_SUB.ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_SUB.ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM (
SELECT
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824",
"4872666167115776",
"6396348765044736",
"6139303562313728",
"4988973881491456")
AND portal_key NOT IN ("5878607637381120")
GROUP BY
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
ZIP3,
ZIP5,
STATE,
) AS MAIN
LEFT JOIN (
SELECT
ZIP3,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP3_SUB
ON
main.ZIP3 = ZIP3_SUB.ZIP3
AND main.COMPANY_KEY = ZIP3_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP3_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP3,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP3 ) AS ZIP3_COUNT
ON
main.ZIP3 = ZIP3_COUNT.ZIP3
LEFT JOIN (
SELECT
ZIP5,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
COMPANY_KEY,
RATING_CLASS,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE
WHEN LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_LOWEST_COUNT,
SUM(CASE
WHEN TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
SUM(CASE
WHEN DISCOUNTED_TOP5 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE
WHEN DISCOUNTED_LOWEST = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
SUM(CASE
WHEN DISCOUNTED_TOP10 = TRUE THEN 1
ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5,
COMPANY_NAME,
COMPANY_KEY,
RATING_CLASS,
) AS ZIP5_SUB
ON
main.ZIP5 = ZIP5_SUB.ZIP5
AND main.COMPANY_KEY = ZIP5_SUB.COMPANY_KEY
AND main.RATING_CLASS = ZIP5_SUB.RATING_CLASS
LEFT JOIN (
SELECT
ZIP5,
EXACT_COUNT_DISTINCT(logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
ZIP5 ) AS ZIP5_COUNT
ON
main.ZIP5 = ZIP5_COUNT.ZIP5
LEFT JOIN (
SELECT
STATE,
EXACT_COUNT_DISTINCT(logging_key) AS STATE_MED_SUPP_TOOL_NUM_QUOTE
FROM
[csgapi:qh_med_supp_tool.v2_TX]
WHERE
SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T06:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T05:00:00.000Z')
AND user_key NOT IN ("6522869941010432",
"6277136540237824")
GROUP BY
STATE ) AS STATE_COUNT
ON
main.STATE = STATE_COUNT.STATE

解释:
由于 BigQuery 是共享资源,BigQuery 会估算查询所需的资源量并相应地分配资源。通过将查询拆分为同一个表上的多个连接,可以使用比原先预期更多的资源来执行计算。有关为何限制查询效果良好的技术性解释,请参阅 Jordan Tigani 上的堆栈溢出响应 here

最佳答案

不知何故,我觉得下面是你需要的。我可能是错的,因为它实际上只是相对盲目地尝试对您的逻辑进行逆向工程,所以不要在这里判断我是否错了
无法测试,但我觉得实际成功的成本会很低

SELECT
main.COMPANY_NAME AS COMPANY_NAME,
main.COMPANY_KEY AS COMPANY_KEY,
main.RATING_CLASS AS RATING_CLASS,
SUM(ZIP5_MED_SUPP_TOOL_NUM_QUOTE) OVER () AS STATE_MED_SUPP_TOOL_NUM_QUOTE,
-- ZIP3
main.ZIP3 AS ZIP3,
ZIP3_MED_SUPP_TOOL_NUM_QUOTE AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE,
ZIP3_TOTAL_RESULT_APPEARANCE,
ZIP3_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_LOWEST,
ZIP3_AVG_RATIO_TO_LOWEST AS ZIP3_AVG_RATIO_TO_LOWEST,
ZIP3_AVG_RANK AS ZIP3_AVG_RANK,
ZIP3_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP5,
ZIP3_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_TOP10,
ZIP3_AVG_CENT_DIFF AS ZIP3_AVG_CENT_DIFF,
ZIP3_DISCOUNTED_LOWEST_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_LOWEST,
ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP3_DISCOUNTED_AVG_RANK AS ZIP3_DISCOUNTED_AVG_RANK,
ZIP3_DISCOUNTED_TOP5_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP5,
ZIP3_DISCOUNTED_TOP10_COUNT/ZIP3_TOTAL_RESULT_APPEARANCE AS ZIP3_DISCOUNTED_TOP10,
ZIP3_DISCOUNTED_AVG_CENT_DIFF AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
-- ZIP5
main.ZIP5 AS ZIP5,
ZIP5_MED_SUPP_TOOL_NUM_QUOTE AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE,
ZIP5_TOTAL_RESULT_APPEARANCE,
ZIP5_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_LOWEST,
ZIP5_AVG_RATIO_TO_LOWEST AS ZIP5_AVG_RATIO_TO_LOWEST,
ZIP5_AVG_RANK AS ZIP5_AVG_RANK,
ZIP5_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP5,
ZIP5_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_TOP10,
ZIP5_AVG_CENT_DIFF AS ZIP5_AVG_CENT_DIFF,
ZIP5_DISCOUNTED_LOWEST_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_LOWEST,
ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
ZIP5_DISCOUNTED_AVG_RANK AS ZIP5_DISCOUNTED_AVG_RANK,
ZIP5_DISCOUNTED_TOP5_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP5,
ZIP5_DISCOUNTED_TOP10_COUNT/ZIP5_TOTAL_RESULT_APPEARANCE AS ZIP5_DISCOUNTED_TOP10,
ZIP5_DISCOUNTED_AVG_CENT_DIFF AS ZIP5_DISCOUNTED_AVG_CENT_DIFF
FROM (
SELECT COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY COMPANY_NAME, COMPANY_KEY, RATING_CLASS, ZIP3, ZIP5
) AS main
LEFT JOIN (
SELECT
ZIP3, company_key, rating_class,
COUNT(*) AS ZIP3_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP3_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP3_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP3_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP3_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3, company_key, rating_class
) AS zip3_sub
ON main.ZIP3 = zip3_sub.ZIP3 AND main.company_key = zip3_sub.company_key AND main.rating_class = zip3_sub.rating_class
LEFT JOIN (
SELECT
ZIP5, company_key, rating_class,
COUNT(*) AS ZIP5_TOTAL_RESULT_APPEARANCE,
SUM(CASE WHEN lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_AVG_RANK,
SUM(CASE WHEN top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP5_COUNT,
SUM(CASE WHEN top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_AVG_CENT_DIFF,
SUM(CASE WHEN DISCOUNTED_lowest = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_LOWEST_COUNT,
AVG(discounted_ratio_to_min) AS ZIP5_DISCOUNTED_AVG_RATIO_TO_LOWEST,
AVG(discounted_rate_order) AS ZIP5_DISCOUNTED_AVG_RANK,
SUM(CASE WHEN DISCOUNTED_top5 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP5_COUNT,
SUM(CASE WHEN DISCOUNTED_top10 = TRUE THEN 1 ELSE 0 END) AS ZIP5_DISCOUNTED_TOP10_COUNT,
AVG(discounted_cent_diff) AS ZIP5_DISCOUNTED_AVG_CENT_DIFF,
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5, company_key, rating_class
) AS zip5_sub
ON main.ZIP5 = zip5_sub.ZIP5 AND main.company_key = zip5_sub.company_key AND main.rating_class = zip5_sub.rating_class
LEFT JOIN (
SELECT ZIP3, COUNT(DISTINCT logging_key) AS ZIP3_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP3
) AS zip3_count
ON main.ZIP3 = zip3_count.ZIP3
LEFT JOIN (
SELECT ZIP5, COUNT(DISTINCT logging_key) AS ZIP5_MED_SUPP_TOOL_NUM_QUOTE
FROM [csgapi:qh_med_supp_tool.v2_TX]
WHERE SEARCH_TIMESTAMP >= TIMESTAMP('2016-01-01T00:00:00.000Z')
AND SEARCH_TIMESTAMP <= TIMESTAMP('2017-01-01T00:00:00.000Z')
AND user_key NOT IN ("6522869941010432", "6277136540237824")
GROUP BY ZIP5
) AS zip5_count
ON main.ZIP5 = zip5_count.ZIP5

另外,请注意:在 BigQuery Legacy SQL 中 - COUNT(DISTINCT)函数是概率性的 - 给出统计近似值,不保证准确。
您可以使用 EXACT_COUNT_DISTINCT()相反的功能 - 这个给你确切的数字,但在后端贵一点

当然,整个查询可以重写为 BigQuery Standard SQL当 COUNT(DISTINCT) 产生精确计数并且根据 BigQuery 团队的建议最好使用标准 SQL 时

关于google-bigquery - 查询执行期间资源超出。大查询,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41750801/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com