gpt4 book ai didi

php - 提高(查询)性能

转载 作者:可可西里 更新时间:2023-11-01 08:26:51 26 4
gpt4 key购买 nike

我有一个数据库,其中包含大量记录(n_building/n_residence 表中的数以千计,buildinggeo 表中的数以百万计)。这是数据库的简化版本:

Simplified Database Schema

这是导出到 SQL 时的样子:

CREATE TABLE IF NOT EXISTS `district` (
`districtid` INT(20) NOT NULL COMMENT 'cbs_wijk_cd',
`description` VARCHAR(255) NOT NULL COMMENT 'cbs_wijk_oms',
`municipalityid` INT(20) NOT NULL COMMENT 'FK gemeente',
PRIMARY KEY (`districtid`),
INDEX `wijk_gemeente_fk` (`municipalityid` ASC),
CONSTRAINT `fk_district_municipality`
FOREIGN KEY (`municipalityid`)
REFERENCES `municipality` (`municipalityid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

CREATE TABLE IF NOT EXISTS `neighborhood` (
`neighborhoodid` INT(20) NOT NULL COMMENT 'cbs_buurt_cd',
`description` VARCHAR(255) NOT NULL COMMENT 'cbs_buurt_oms',
`districtid` INT(20) NOT NULL COMMENT 'FK wijk',
`municipalityid` INT(20) NOT NULL COMMENT 'FK gemeente',
PRIMARY KEY (`neighborhoodid`),
INDEX `buurt_gemeente_fk` (`municipalityid` ASC),
INDEX `buurt_wijk_fk` (`districtid` ASC),
FULLTEXT INDEX `index_neighborhood_description` (`description` ASC),
CONSTRAINT `fk_neighborhood_municipality`
FOREIGN KEY (`municipalityid`)
REFERENCES `municipality` (`municipalityid`)
ON DELETE CASCADE
ON UPDATE CASCADE,
CONSTRAINT `fk_neighborhood_district`
FOREIGN KEY (`districtid`)
REFERENCES `district` (`districtid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

CREATE TABLE IF NOT EXISTS `n_building` (
`buildingid` BIGINT(20) NOT NULL,
`neighborhoodid` INT(10) NOT NULL,
`constructionyear` INT(4) NOT NULL,
`height` INT(3) NOT NULL DEFAULT 9,
`multifamily` TINYINT(1) NOT NULL DEFAULT 0,
PRIMARY KEY (`buildingid`),
INDEX `fk_building_buurt_idx` (`neighborhoodid` ASC),
INDEX `index_neighborhoodid_buildingid` (`neighborhoodid` ASC, `buildingid` ASC),
CONSTRAINT `fk_building_neighborhood`
FOREIGN KEY (`neighborhoodid`)
REFERENCES `neighborhood` (`neighborhoodid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

CREATE TABLE IF NOT EXISTS `n_buildinggeo` (
`buildingid` BIGINT(20) NOT NULL,
`order` INT(5) NOT NULL,
`lat` DECIMAL(11,8) NOT NULL,
`lon` DECIMAL(11,8) NOT NULL,
PRIMARY KEY (`buildingid`, `order`),
CONSTRAINT `fk_buildinggeo_building`
FOREIGN KEY (`buildingid`)
REFERENCES `n_building` (`buildingid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

CREATE TABLE IF NOT EXISTS `n_residence` (
`residenceid` BIGINT(20) NOT NULL,
`buildingid` BIGINT(20) NOT NULL,
`geolat` DECIMAL(11,8) NOT NULL,
`geolon` DECIMAL(11,8) NOT NULL,
PRIMARY KEY (`residenceid`),
INDEX `fk_residence_building_idx` (`buildingid` ASC),
INDEX `index_geoloat_geolon_residenceid` (`geolat` ASC, `geolon` ASC, `residenceid` ASC),
INDEX `index_geolat` (`geolat` ASC),
INDEX `index_geolon` (`geolon` ASC),
CONSTRAINT `fk_residence_building`
FOREIGN KEY (`buildingid`)
REFERENCES `n_building` (`buildingid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

CREATE TABLE IF NOT EXISTS `n_vabidata` (
`residenceid` BIGINT(20) NOT NULL,
`index` FLOAT NULL COMMENT ' ',
`indexdate` VARCHAR(25) NULL,
`type` VARCHAR(100) NULL,
`subtype` VARCHAR(150) NULL,
`rooftype` VARCHAR(50) NULL,
PRIMARY KEY (`residenceid`),
CONSTRAINT `fk_vabidata_residence`
FOREIGN KEY (`residenceid`)
REFERENCES `n_residence` (`residenceid`)
ON DELETE CASCADE
ON UPDATE CASCADE)
ENGINE = InnoDB;

我的目标是像这样创建该数据库内容的特定部分的 JSON 表示(这显然是匿名数据):

[
{
"buildingid": "632100000000000",
"buurtid": "6320103",
"constructionyear": "1969",
"height": "9",
"multifamily": "0",
"gemeenteid": "632",
"geo": [
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
},
{
"lat": "52.000",
"lon": "4.000"
}
],
"res": [
{
"residenceid": "632010000000000",
"surface": "159",
"postalcode": "3400AA",
"streetname": "Streetname",
"housenumber": "00",
"clusternr": "6320103533",
"owner": "onbekend",
"usageelec": "2463",
"usagegas": "2006",
"nomupd": "0",
"cpwin": "0",
"cpble": "0",
"enet": "0",
"gnet": "0",
"type": null
}
]
}
]

有两种过滤数据库的方法:通过 neighbourhoodid(该社区的所有建筑物等)或通过边界框(所有建筑物等)。起初我决定以一种非常简单的方式这样做:

$path2 = Config::Path(2);//minlat
$path3 = Config::Path(3);//minlon
$path4 = Config::Path(4);//maxlat
$path5 = Config::Path(5);//maxlon

if (($path2 && is_numeric($path2) && $path3 && is_numeric($path3) &&
$path4 && is_numeric($path4) && $path5 && is_numeric($path5)) ||
($path2 == "district" && $path3 && is_numeric($path3))) {
if ($path2 == "neighborhood") {
$buildings = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid` AS buurtid, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid` AS gemeenteid
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
WHERE b.`neighborhoodid` = '$path3'
GROUP BY b.`buildingid`;
");
} else {
$buildings = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid` AS buurtid, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid` AS gemeenteid
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
WHERE r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5'
GROUP BY b.`buildingid`;
");
}

if ($buildings && count($buildings) > 0) {
for ($i = 0; $i < count($buildings); $i++) {
$building = $buildings[$i];

$buildinggeo = DBUtils::FetchQuery("
SELECT bg.`lat`, bg.`lon`
FROM `n_buildinggeo` bg
WHERE bg.`buildingid` = '$building[buildingid]';
");

if ($buildinggeo && count($buildinggeo) > 0) {
$buildings[$i]['geo'] = $buildinggeo;

$buildingresidences = DBUtils::FetchQuery("
SELECT r.`residenceid`, r.`surface`, r.`postalcode`, r.`streetname`,
r.`housenumber`, r.`clusternr`, r.`owner`, r.`usageelec`,
r.`usagegas`, r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`,
r.`gnet`, v.`type`
FROM `n_residence` r
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE r.`buildingid` = '$building[buildingid]';
");

if ($buildingresidences && count($buildingresidences) > 0) {
$buildings[$i]['res'] = $buildingresidences;
}
}
}

echo json_encode($buildings);
}
}

后来我决定在单个查询中获取所有建筑物/住宅/vabidata 信息,并从中创建所需的 JSON 结构,因为每个请求(> 5 个建筑物)中的大部分时间都花在获取住宅数据上。

$path2 = Config::Path(2);//minlat
$path3 = Config::Path(3);//minlon
$path4 = Config::Path(4);//maxlat
$path5 = Config::Path(5);//maxlon

if (($path2 && is_numeric($path2) && $path3 && is_numeric($path3) &&
$path4 && is_numeric($path4) && $path5 && is_numeric($path5)) ||
($path2 == "district" && $path3 && is_numeric($path3))) {
if ($path2 == "district") {
$results = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid`, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid`, r.`residenceid`,
r.`surface`, r.`postalcode`, r.`streetname`, r.`housenumber`,
r.`clusternr`, r.`owner`, r.`usageelec`, r.`usagegas`,
r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`, r.`gnet`,
v.`type`
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE b.`neighborhoodid` = '$path3';
");
} else {
$results = DBUtils::FetchQuery("
SELECT b.`buildingid`, b.`neighborhoodid`, b.`constructionyear`,
b.`height`, b.`multifamily`, n.`municipalityid`, r.`residenceid`,
r.`surface`, r.`postalcode`, r.`streetname`, r.`housenumber`,
r.`clusternr`, r.`owner`, r.`usageelec`, r.`usagegas`,
r.`nomupd`, r.`cpwin`, r.`cpble`, r.`enet`, r.`gnet`,
v.`type`
FROM `neighborhood` n
INNER JOIN `n_building` b ON b.`neighborhoodid` = n.`neighborhoodid`
INNER JOIN `n_residence` r ON r.`buildingid` = b.`buildingid`
LEFT OUTER JOIN `n_vabidata` v ON r.`residenceid` = v.`residenceid`
WHERE r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5';
");
}

if ($results && count($results) > 0) {
$buildings = array();

for ($i = 0; $i < count($results); $i++) {
$b = $results[$i];

if (!array_key_exists($b['buildingid'],$buildings)) {
$buildings[$b['buildingid']] = array(
"buildingid" => $b['buildingid'],
"buurtid" => $b['neighborhoodid'],
"constructionyear" => $b['constructionyear'],
"height" => $b['height'],
"multifamily" => $b['multifamily'],
"gemeenteid" => $b['municipalityid'],
"res" => array()
);
}

$buildings[$b['buildingid']]['res'][] = array(
"residenceid" => $b['residenceid'],
"surface" => $b['surface'],
"postalcode" => $b['postalcode'],
"streetname" => $b['streetname'],
"housenumber" => $b['housenumber'],
"clusternr" => $b['clusternr'],
"owner" => $b['owner'],
"usageelec" => $b['usageelec'],
"usagegas" => $b['usagegas'],
"nomupd" => $b['nomupd'],
"cpwin" => $b['cpwin'],
"cpble" => $b['cpble'],
"enet" => $b['enet'],
"gnet" => $b['gnet'],
"type" => $b['type']
);
}

$buildings = array_values($buildings);

for ($i = 0; $i < count($buildings); $i++) {
$building = $buildings[$i];

$buildinggeo = DBUtils::FetchQuery("
SELECT bg.`lat`, bg.`lon`
FROM `n_buildinggeo` bg
WHERE bg.`buildingid` = '$building[buildingid]';
");

if ($buildinggeo && count($buildinggeo) > 0) {
$buildings[$i]['geo'] = $buildinggeo;
}
}

echo json_encode($buildings);
}
}

但是这种方法似乎比以前的方法慢了 30-70%。我的问题是:你们中的任何人都可以找到原因(以及可能的解决方案)为什么一个/两个查询都表现得如此糟糕吗?如果您有任何疑问或需要任何其他信息,请询问。

编辑

这是对第二个查询(一体式)的边界框版本的 EXPLAIN 的结果:

EXPAIN result

最佳答案

WHERE  b.`neighborhoodid` = '$path3'
GROUP BY b.`buildingid`

b 需要 INDEX(neighborhoodid, buildingid) -- 按此顺序
r 需要 INDEX(buildingid)
r 需要 INDEX(lat, lon, residenceid) -- 按此顺序

(请提供SHOW CREATE TABLE 以便我们可以看到您有哪些索引。)

WHERE  r.`geolat` >= '$path2'
AND r.`geolon` >= '$path3'
AND r.`geolat` <= '$path4'
AND r.`geolon` <= '$path5'

目前无法优化。参见 http://mysql.rjweb.org/doc.php/latlng了解如何处理按纬度/经度进行的大量搜索。

关于php - 提高(查询)性能,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34592922/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com