gpt4 book ai didi

Mysql group by order by 性能低下

转载 作者:行者123 更新时间:2023-11-30 21:32:40 37 4
gpt4 key购买 nike

我正在使用 Mysql 5.7,我有一个包含 7006500 行的表。我的查询执行分组依据并获取行,该行具有最大计数,每个组在已经索引但仍需要时间执行的列上。下面是我的查询、执行计划和表架构。

表架构

        CREATE TABLE templog (
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
userid bigint(12) unsigned NOT NULL,
type tinyint(3) NOT NULL DEFAULT '0',
os tinyint(4) NOT NULL DEFAULT '0',
day date DEFAULT NULL,
activetime smallint(5) unsigned NOT NULL DEFAULT '0',
createdat datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
timegroupid tinyint(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
KEY templog_type_IDX (`type`,`day`,`userid`,`timegroupid`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=7006500 DEFAULT CHARSET=utf8;

我的查询:-

SELECT  SQL_NO_CACHE y.userid, y.timegroupid as besttime,y.cnt
FROM (
SELECT @row_number := CASE WHEN @userid=x.userid THEN @row_number+1 ELSE 1 END AS row_number ,
@userid := x.userid AS userid ,x.cnt,x.timegroupid
FROM (
SELECT userid, timegroupid ,COUNT(userid) as cnt
from templog
where type = 3
AND day BETWEEN '2020-01-01' AND '2020-01-20'
AND userid < 771267
GROUP by userid, timegroupid
ORDER by userid DESC ,cnt DESC
) x,
( SELECT @row_number:=0, @userid:='') AS t
) y
where y.row_number = 1
ORDER by y.userid DESC
LIMIT 1000;

查询说明格式:

                {
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "12.00"
},
"ordering_operation": {
"using_filesort": true,
"table": {
"table_name": "y",
"access_type": "ref",
"possible_keys": [
"<auto_key0>"
],
"key": "<auto_key0>",
"used_key_parts": [
"row_number"
],
"key_length": "9",
"ref": [
"const"
],
"rows_examined_per_scan": 10,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.00",
"eval_cost": "2.00",
"prefix_cost": "12.00",
"data_read_per_join": "320"
},
"used_columns": [
"row_number",
"userid",
"cnt",
"timegroupid"
],
"attached_condition": "((`y`.`row_number` <=> 1))",
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "6441.25"
},
"nested_loop": [
{
"table": {
"table_name": "t",
"access_type": "system",
"rows_examined_per_scan": 1,
"rows_produced_per_join": 1,
"filtered": "100.00",
"cost_info": {
"read_cost": "0.00",
"eval_cost": "0.20",
"prefix_cost": "0.00",
"data_read_per_join": "16"
},
"used_columns": [
"@row_number:=0",
"@userid:=''"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 4,
"message": "No tables used"
}
}
}
},
{
"table": {
"table_name": "x",
"access_type": "ALL",
"rows_examined_per_scan": 25725,
"rows_produced_per_join": 25725,
"filtered": "100.00",
"cost_info": {
"read_cost": "1296.25",
"eval_cost": "5145.00",
"prefix_cost": "6441.25",
"data_read_per_join": "602K"
},
"used_columns": [
"userid",
"timegroupid",
"cnt"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "140807.11"
},
"ordering_operation": {
"using_filesort": true,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "templog",
"access_type": "range",
"possible_keys": [
"templog_type_IDX"
],
"key": "templog_type_IDX",
"used_key_parts": [
"type",
"day"
],
"key_length": "13",
"rows_examined_per_scan": 694718,
"rows_pr
oduced_per_join": 25725,
"filtered": "33.33",
"using_index": true,
"cost_info": {
"read_cost": "1863.51",
"eval_cost": "5145.03",
"prefix_cost": "140807.11",
"data_read_per_join": "803K"
},
"used_columns": [
"id",
"userid",
"type",
"day",
"timegroupid"
],
"attached_condition": "((`templog`.`type` = 3) and (`templog`.`day` between '2020-01-01' and '2020-01-20') and (`templog`.`userid` < 771267))"
}
}
}
}
}
}
}
]
}
}
}
}
}
}

是否有其他方法可以优化查询或更改索引顺序或以其他方式重写查询以获得更好的性能?

最佳答案

  1. 不要指望@variables 会像您期望的那样工作。我认为下一个版本将开始禁止它们。

  2. 优化器可以随意丢弃派生表中的 ORDER BY。这会导致错误的结果。对子查询添加较大的 LIMIT 可能可以防止这种情况发生。

  3. 建立并维护一个“汇总表”。这可以显着加快此查询和类似查询的速度。

    CREATE TABLE Summary (
    userid ...,
    timegroupid ...,
    type ...,
    day ...,
    cnt SMALLINT UNSIGNED NOT NULL, -- COUNT(*)
    tottime INT UNSIGNED NOT NULL, -- SUM(activetime)
    PRIMARY KEY(timegroupid, userid, type, day)

但是,如果不更好地了解数据,我无法预测此表是否会明显小于原始表。如果明显变小,这个汇总表就不实用了。

  1. 我添加了另一个标签——关注它以获取更多关于 groupwise-max 的讨论。

关于Mysql group by order by 性能低下,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/55434260/

37 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com