gpt4 book ai didi

python - 比提供的解决方案更快地获取排列索引和索引处的排列

转载 作者:行者123 更新时间:2023-12-05 05:46:21 26 4
gpt4 key购买 nike

多亏了这个answer ,这是我如何获得排列索引和索引排列:

import time


def get_Cl(distinct):
Cl = []
for i in range(1, distinct + 1): # i is distincct
c = [0] * i + [1, 0]
C = [c]
for l in range(2, distinct + 1):
c = [
c[d] * d + c[d + 1] * (distinct - d)
for d in range(i + 1)
] + [0]
C.append(c)
Cl.append(C)
return Cl


def item_index(item, distinct, n_symbols, Cl):
length = len(item)
offset = 0
seen = set()
for i, di in enumerate(item):
for d in range(n_symbols):
if d == di:
break
if d in seen:
# test = Cl[distinct][length - 1 - i][len(seen)]
offset += Cl[distinct][length - 1 - i][len(seen)]
else:
offset += Cl[distinct][length - 1 - i][len(seen) + 1]
seen.add(di)
return offset


def item_at(idx, length, distinct, n_symbols, Cl):
seen = [0] * n_symbols
prefix = [0] * length
used = 0
for i in range(length):
for d in range(n_symbols):
if seen[d] != 0:
branch_count = Cl[distinct][length - 1 - i][used]
else:
branch_count = Cl[distinct][length - 1 - i][used + 1]
if branch_count <= idx:
idx -= branch_count
else:
prefix[i] = d
if seen[d] == 0:
used += 1
seen[d] = 1
break
return prefix


if __name__ == "__main__":
start_time = time.time()
Cl = get_Cl(512)
end_time = time.time()
print(f'{(end_time - start_time)} seconds for Cl')
start_time = time.time()
item = item_at(idx=432, length=512, distinct=350, n_symbols=512, Cl=Cl)
end_time = time.time()
print(f'{(end_time - start_time)} seconds for item_at')
print(item)
start_time = time.time()
print(item_index(item=item, distinct=350, n_symbols=512, Cl=Cl))
end_time = time.time()
print(f'{(end_time - start_time)} seconds for item_index')
356.3069865703583 seconds for Cl
2.5428783893585205 seconds for item_at
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 351, 458]
432
0.025868892669677734 seconds for item_index

它工作正常,除非数字变大,然后它变得非常慢。想知道是否可以像 this 这样改进此代码答案是计算所有排列的同一个慢函数的改进版本?

我在单独的一行中得到 Cl 的原因是对于固定的 distinct 将有数千次调用 item_atitem_index,因此如果 distinct 相同,则 Cl 相同,因此无需为每个 item_atitem_index.

更新:答案的测试结果

0.008994340896606445 seconds for item_at
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 347, 348, 344, 345, 346, 349]
432
0.006995677947998047 seconds for item_index

最佳答案

在这个答案中,我将演示两个可以提高 item_atitem_index 速度的修改。

在我们开始之前,让我们初始化 Cl 表,以处理 distinct=200

的调用
def get_Cl(length, distinct):
i = distinct
c = [0] * i + [1, 0]
C = [c]
for l in range(2, length+1):
c = [
c[d] * d + c[d + 1] * (i - d)
for d in range(i + 1)
] + [0]
C.append(c)
return C;

Cl = {200:get_Cl(300, 200)}

修改item_index

请注意,item_index 的内部循环只是将 offset 增加不依赖于 d in seen 但不依赖于 in 的值d 本身。如果我们事先知道有多少次 d in seen 将是 True。因此,让我们以跟踪数组 seen_before[d] 中在 d 之前看到的值的数量的方式重写代码。

import numpy as np
def item_index_bs(item, distinct, n_symbols, Cl):
length = len(item)
offset = 0
seen = set()
seen_before = np.zeros(n_symbols, dtype=np.uint64)
for i, di in enumerate(item):
offset += Cl[distinct][length - 1 - i][len(seen)] * int(seen_before[di]) \
+ Cl[distinct][length - 1 - i][len(seen) + 1] * int(di - seen_before[di]);
if di not in seen:
seen.add(di)
seen_before[di+1:] += 1;

return offset

这可以用

来测试
pp = item_at(256, 300, 200, 300, Cl)
item_index_factored(pp, 200, 300, Cl) # 1.8ms
item_index(pp, 200, 300, Cl) # 5.39ms

修改item_at

对于 item_at,我们不能像 item_index 那样简单地对术语进行分组,但我们可以潜在地跳过一些迭代,比如 idx如果看到该项目则减少a,否则减少b,因此最多减少max(a,b) 并且至少需要 idx//max(a,b) 才能找到要使用的数字。然后我们通过将 ab 乘以它们各自的系数来进行更新。

def item_at_skip(idx, length, distinct, n_symbols, Cl):
seen = [0] * n_symbols;
prefix = [0] * length
used = 0
for i in range(length):
a = Cl[distinct][length - 1 - i][used];
b = Cl[distinct][length - 1 - i][used + 1]
c = idx // max(a,b) # d will be at least c
ac = sum(seen[:c]) # the number of time a is subtracted
idx -= a * ac + b * (c - ac);
for d in range(c, n_symbols):
if seen[d] != 0:
branch_count = a
else:
branch_count = b
if branch_count <= idx:
idx -= branch_count
else:
prefix[i] = d
if seen[d] == 0:
used += 1
seen[d] = 1
break
return prefix
assert item_at_skip(10**200, 300, 200, 300, Cl) == item_at(10**200, 300, 200, 300, Cl)
item_at_skip(10**200, 300, 200, 300, Cl) # 3.16ms
item_at(10**200, 300, 200, 300, Cl) # 6.25ms

关于python - 比提供的解决方案更快地获取排列索引和索引处的排列,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/71207863/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com