gpt4 book ai didi

MongoDB服务器卡住 - 大量集合

转载 作者:IT老高 更新时间:2023-10-28 12:32:08 28 4
gpt4 key购买 nike

我们有大型 MongoDB 数据库(大约 140 万个集合)、MongoDB 3.0、引擎 RocksDB、操作系统 Ubuntu 14.04。

此数据库位于具有 16 个内核和 108 GB RAM 的虚拟机 (VmWare vCloud) 上(当前 mongoDB 使用 70GB 内存,没有交换)。

生产设置选项:

  • 专用分区上的数据 - XFS 文件系统
  • transparent_hugepage 已启用 - 从不
  • transparent_hugepage 碎片整理 - 从不

数据库统计:

{
"db" : "ctp",
"collections" : 1369486,
"objects" : 20566852,
"avgObjSize" : 1126.82749999854,
"dataSize" : 23175294422,
"storageSize" : 23231888384,
"numExtents" : 0,
"indexes" : 6686175,
"indexSize" : 685981393,
"ok" : 1
}

样本集合大小:

{
"ns" : "ctp._cf123_ct49_dfc-r_dtc-r_tof2_groupat",
"count" : 33,
"size" : 38172,
"avgObjSize" : 1156,
"storageSize" : 38144,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 6312,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 6246,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}

{
"ns" : "ctp._afpoznan123_atlondyn49_df2016-09_dt2016-09_tof2_groupdfdt",
"count" : 188,
"size" : 208677,
"avgObjSize" : 1109,
"storageSize" : 208640,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 7945,
"indexSizes" : {
"_id_" : 2845,
"exAt" : 256,
"_smp" : 160,
"_smpdf" : 352,
"unique" : 4332
},
"ok" : 1
}
{
"ns" : "ctp._cf123_ct42_dfc-r_dtc-r_tof2_groupat",
"count" : 27,
"size" : 30400,
"avgObjSize" : 1125,
"storageSize" : 30208,
"capped" : false,
"nindexes" : 5,
"totalIndexSize" : 84,
"indexSizes" : {
"_id_" : 18,
"exAt" : 16,
"unique" : 18,
"_smp" : 10,
"_smpdf" : 22
},
"ok" : 1
}

每隔 5 分钟定期运行一次脚本,该脚本会写入这些集合并在此集合不存在时创建新的集合(集合名称基于这些集合中的数据)并创建索引。

我们注意到,在将数据写入集合期间,此服务器出现了一些卡住。这种卡住可能需要 5 到 60 秒。

有没有人遇到过这个问题并可以帮助我们?

以下是卡住时刻的一些日志:

db.currentOP();

"opid" : 22717868,
"active" : false,
"op" : "query",
"ns" : "ctp._cf115_atboma25_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33832",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(22004831)
}
}
}
},
{
"desc" : "conn135907",
"threadId" : "0xc3e5d64e0",
"connectionId" : 135907,
"opid" : 22719375,
"active" : true,
"secs_running" : 0,
"microsecs_running" : NumberLong(223601),
"op" : "query",
"ns" : "top_search.top_searches",
"query" : {
"$msg" : "query not recording (too large)"
},
"planSummary" : "IXSCAN { T#df: 1, T#dt: 1 }",
"client" : "192.168.1.33:33648",
"numYields" : 170,
"locks" : {
"Global" : "r",
"Database" : "r",
"Collection" : "r"
},
"waitingForLock" : false,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(342)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(171)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(171)
}
}
}
},
{
"desc" : "conn135959",
"threadId" : "0x10d4445260",
"connectionId" : 135959,
"opid" : 22718533,
"active" : false,
"op" : "query",
"ns" : "ctp._afoxford-house23_attamarindo32_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:34022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15003580)
}
}
}
},
{
"desc" : "conn135829",
"threadId" : "0x10d4445740",
"connectionId" : 135829,
"opid" : 22717923,
"active" : false,
"op" : "query",
"ns" : "ctp._ct123_dfc-r_dtc-r_tof2_groupdfym",
"query" : {
"$query" : {
"T#df" : {
"$lt" : "2017-02-28"
}
},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33026",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21004810)
}
}
}
},
{
"desc" : "conn135781",
"threadId" : "0x2d678e0",
"connectionId" : 135781,
"opid" : 22718920,
"active" : false,
"op" : "query",
"ns" : "ctp._cf1147_atrostock36_df2016-06_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60874",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(12002770)
}
}
}
},
{
"desc" : "conn135870",
"threadId" : "0xd04ed5d40",
"connectionId" : 135870,
"opid" : 22719172,
"active" : false,
"op" : "query",
"ns" : "ctp._cf61_atpristina131_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33369",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7001590)
}
}
}
},
{
"desc" : "conn135687",
"threadId" : "0xc3e5d7380",
"connectionId" : 135687,
"opid" : 22717925,
"active" : false,
"op" : "query",
"ns" : "ctp._cf105_athana156_df2016-06_dt2016-06_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60022",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(21003871)
}
}
}
},
{
"desc" : "conn135754",
"threadId" : "0xd04ed5860",
"connectionId" : 135754,
"opid" : 22718485,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atdhaka1113_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:60603",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15503084)
}
}
}
},
{
"desc" : "conn135644",
"threadId" : "0xc3e5d9c20",
"connectionId" : 135644,
"opid" : 22719073,
"active" : false,
"op" : "query",
"ns" : "ctp._afgenua71_ataarhus37_dfc-r_dtc-r_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59698",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(7501602)
}
}
}
},
{
"desc" : "conn135891",
"threadId" : "0xd04ed7a80",
"connectionId" : 135891,
"opid" : 22719284,
"active" : false,
"op" : "query",
"ns" : "ctp._attianjin30_dfc-r_dtc-r_tof2_groupcf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:33530",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(3000658)
}
}
}
},
{
"desc" : "conn135673",
"threadId" : "0xd04ed6220",
"connectionId" : 135673,
"opid" : 22718185,
"active" : false,
"op" : "query",
"ns" : "ctp._afwroclaw123_atlondyn49_df2016-06_dt2016-06_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:59925",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503737)
}
}
}
},
{
"desc" : "conn135989",
"threadId" : "0x10d44443c0",
"connectionId" : 135989,
"opid" : 22719240,
"active" : false,
"op" : "query",
"ns" : "ctp._cf28_atmarakesz93_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34367",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(4500947)
}
}
}
},
{
"desc" : "conn135410",
"threadId" : "0x2d66220",
"connectionId" : 135410,
"opid" : 22717853,
"active" : true,
"secs_running" : 22,
"microsecs_running" : NumberLong(22406019),
"op" : "query",
"ns" : "ctp.$cmd",
"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
},
"client" : "0.0.0.0:0",
"numYields" : 0,
"locks" : {
"Global" : "w",
"Database" : "W"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(376227),
"w" : NumberLong(15477)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476),
"W" : NumberLong(1)
},
"acquireWaitCount" : {
"W" : NumberLong(1)
},
"timeAcquiringMicros" : {
"W" : NumberLong(22004935)
}
},
"Collection" : {
"acquireCount" : {
"r" : NumberLong(180375),
"w" : NumberLong(15476)
}
}
}
},
{
"desc" : "conn135961",
"threadId" : "0x10d4442b60",
"connectionId" : 135961,
"opid" : 22718537,
"active" : false,
"op" : "query",
"ns" : "ctp._cf5_atattawapiskat23_dfc-r_dtc-r_tof2_groupaf",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1)
}
},
"client" : "192.168.1.33:34029",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(15002978)
}
}
}
},
{
"desc" : "conn135905",
"threadId" : "0xc3e5d6000",
"connectionId" : 135905,
"opid" : 22718186,
"active" : false,
"op" : "query",
"ns" : "ctp._afwarszawa123_atdubrownik61_df2016-08_dt2016-08_tof2_groupdfdt",
"query" : {
"$query" : {},
"$orderby" : {
"T#mp" : NumberLong(1),
"T#df" : NumberLong(1)
}
},
"client" : "192.168.1.33:33638",
"numYields" : 0,
"locks" : {
"Global" : "r",
"Database" : "r"
},
"waitingForLock" : true,
"lockStats" : {
"Global" : {
"acquireCount" : {
"r" : NumberLong(2)
}
},
"Database" : {
"acquireCount" : {
"r" : NumberLong(1)
},
"acquireWaitCount" : {
"r" : NumberLong(1)
},
"timeAcquiringMicros" : {
"r" : NumberLong(16503305)
}
}
}
}
]

db.serverStatus()["rocksdb"];

{
"stats" : [
"",
"** Compaction Stats [default] **",
"Level Files Size(MB) Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) Comp(cnt) Avg(sec) Stall(cnt) KeyIn KeyDrop",
"---------------------------------------------------------------------------------------------------------------------------------------------------------------------",
" L0 0/0 0.00 0.0 0.0 0.0 0.0 1.4 1.4 0.0 0.0 0.0 120.1 12 39 0.312 0 0 0",
" L4 0/0 0.00 0.0 1.8 1.8 0.0 1.7 1.7 0.0 1.0 102.0 99.7 18 11 1.606 7 21M 153K",
" L5 15/0 620.47 1.0 6.6 1.4 5.2 5.5 0.3 0.0 3.9 44.4 37.0 152 25 6.086 0 110M 840K",
" L6 106/0 6401.43 0.0 3.5 0.3 3.3 3.3 -0.0 0.0 12.6 25.9 23.7 140 7 20.057 0 162M 14M",
" Sum 121/0 7021.90 0.0 11.9 3.4 8.5 11.9 3.4 0.0 8.3 37.8 37.8 322 82 3.932 7 295M 15M",
" Int 0/0 0.00 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0 0 0.000 0 0 0",
"Flush(GB): cumulative 1.429, interval 0.000",
"Stalls(count): 0 level0_slowdown, 0 level0_slowdown_with_compaction, 0 level0_numfiles, 0 level0_numfiles_with_compaction, 0 pending_compaction_bytes, 0 memtable_compaction, 7 leveln_slowdown_soft, 0 leveln_slowdown_hard",
"",
"** DB Stats **",
"Uptime(secs): 34952.0 total, 0.2 interval",
"Cumulative writes: 4990K writes, 17M keys, 4989K batches, 1.0 writes per batch, ingest: 2.02 GB, 0.06 MB/s",
"Cumulative WAL: 4990K writes, 0 syncs, 4990122.00 writes per sync, written: 2.02 GB, 0.06 MB/s",
"Cumulative compaction: 11.90 GB write, 0.35 MB/s write, 11.90 GB read, 0.35 MB/s read, 322.4 seconds",
"Cumulative stall: 00:00:3.548 H:M:S, 0.0 percent",
"Interval writes: 0 writes, 0 keys, 0 batches, 0.0 writes per batch, ingest: 0.00 MB, 0.00 MB/s",
"Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 MB, 0.00 MB/s",
"Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds",
"Interval stall: 00:00:0.000 H:M:S, 0.0 percent"
],
"num-immutable-mem-table" : "0",
"mem-table-flush-pending" : "0",
"compaction-pending" : "0",
"background-errors" : "0",
"cur-size-active-mem-table" : "33MB",
"cur-size-all-mem-tables" : "33MB",
"num-entries-active-mem-table" : "185495",
"num-entries-imm-mem-tables" : "0",
"estimate-table-readers-mem" : "91MB",
"num-snapshots" : "1",
"oldest-snapshot-time" : "1465911051",
"num-live-versions" : "1",
"total-live-recovery-units" : 60,
"block-cache-usage" : "34GB",
"transaction-engine-keys" : NumberLong(4210),
"transaction-engine-snapshots" : NumberLong(1),
"thread-status" : []
}

db.serverStatus()['globalLock'];

{
"totalTime" : NumberLong(34952090000),
"currentQueue" : {
"total" : 57,
"readers" : 56,
"writers" : 1
},
"activeClients" : {
"total" : 124,
"readers" : 57,
"writers" : 1
}
}

来自 mongostat 的屏幕: screen from mongostat

最好的问候

最佳答案

创建新系列没有意义。这是一个工程缺陷。这个数量的集合肯定会卡住数据库服务器,因为它被迫遍历可用集合列表并检查列表中是否存在集合。另外,我相信这涉及到元数据更新等一些操作。

从操作日志中可以清楚地看出,构建索引需要大量时间

"query" : {
"createIndexes" : "_cf71_df2016-07_dt2016-11_tof2_groupct",
"indexes" : [
{
"key" : {
"expireAt" : 1
},
"name" : "exAt",
"background" : true,
"expireAfterSeconds" : 0
}
]
}

此外,获取锁之类的事情需要时间,这对于创建新的命名空间和索引构建是必要的。同样,像 "expireAfterSeconds": 0 这样的参数在这种规模下并不明智。基本上,您必须每 60 秒运行 140 万个计时器来查找和修剪过期记录并确保重建索引(请参阅 mongostat 上的那些删除)。

请考虑减少集合数量或在部署之间拆分集合的更改。您还可以删除非事件集合的索引。此外,明智的做法是丢弃所有空的和未使用的集合。

要立即解决,您必须找到当前的瓶颈:RAM、CPU 或 IOpS。你有额外的内存,你可以把它给 MongoDB,这将有助于缓存更多,避免不必要的读取。通过 iostat 测量您的 IOpS 以查看您的驱动器的繁忙程度。

关于MongoDB服务器卡住 - 大量集合,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37813710/

28 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com