python - 比提供的解决方案更快地获取排列索引和索引处的排列-6ren

python - 比提供的解决方案更快地获取排列索引和索引处的排列

转载作者：行者123 更新时间：2023-12-05 05:46:21

多亏了这个answer ，这是我如何获得排列索引和索引排列:

import time


def get_Cl(distinct):
    Cl = []
    for i in range(1, distinct + 1):  # i is distincct
        c = [0] * i + [1, 0]
        C = [c]
        for l in range(2, distinct + 1):
            c = [
                    c[d] * d + c[d + 1] * (distinct - d)
                    for d in range(i + 1)
                ] + [0]
            C.append(c)
        Cl.append(C)
    return Cl


def item_index(item, distinct, n_symbols, Cl):
    length = len(item)
    offset = 0
    seen = set()
    for i, di in enumerate(item):
        for d in range(n_symbols):
            if d == di:
                break
            if d in seen:
                # test = Cl[distinct][length - 1 - i][len(seen)]
                offset += Cl[distinct][length - 1 - i][len(seen)]
            else:
                offset += Cl[distinct][length - 1 - i][len(seen) + 1]
        seen.add(di)
    return offset


def item_at(idx, length, distinct, n_symbols, Cl):
    seen = [0] * n_symbols
    prefix = [0] * length
    used = 0
    for i in range(length):
        for d in range(n_symbols):
            if seen[d] != 0:
                branch_count = Cl[distinct][length - 1 - i][used]
            else:
                branch_count = Cl[distinct][length - 1 - i][used + 1]
            if branch_count <= idx:
                idx -= branch_count
            else:
                prefix[i] = d
                if seen[d] == 0:
                    used += 1
                seen[d] = 1
                break
    return prefix


if __name__ == "__main__":
    start_time = time.time()
    Cl = get_Cl(512)
    end_time = time.time()
    print(f'{(end_time - start_time)} seconds for Cl')
    start_time = time.time()
    item = item_at(idx=432, length=512, distinct=350, n_symbols=512, Cl=Cl)
    end_time = time.time()
    print(f'{(end_time - start_time)} seconds for item_at')
    print(item)
    start_time = time.time()
    print(item_index(item=item, distinct=350, n_symbols=512, Cl=Cl))
    end_time = time.time()
    print(f'{(end_time - start_time)} seconds for item_index')

356.3069865703583 seconds for Cl
2.5428783893585205 seconds for item_at  
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 351, 458]  
432
0.025868892669677734 seconds for item_index

它工作正常，除非数字变大，然后它变得非常慢。想知道是否可以像 this 这样改进此代码答案是计算所有排列的同一个慢函数的改进版本？

我在单独的一行中得到 Cl 的原因是对于固定的 distinct 将有数千次调用 item_at 和 item_index，因此如果 distinct 相同，则 Cl 相同，因此无需为每个 item_at 或 item_index.

更新:答案的测试结果

0.008994340896606445 seconds for item_at
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 347, 348, 344, 345, 346, 349]
432
0.006995677947998047 seconds for item_index

最佳答案

在这个答案中，我将演示两个可以提高 item_at 和 item_index 速度的修改。

在我们开始之前，让我们初始化 Cl 表，以处理 distinct=200

的调用

def get_Cl(length, distinct):
      i = distinct
      c = [0] * i + [1, 0]
      C = [c]
      for l in range(2, length+1):
          c = [
                  c[d] * d + c[d + 1] * (i - d)
                  for d in range(i + 1)
              ] + [0]
          C.append(c)
      return C;

Cl = {200:get_Cl(300, 200)}

修改`item_index`

请注意，item_index 的内部循环只是将 offset 增加不依赖于 d in seen 但不依赖于 in 的值d 本身。如果我们事先知道有多少次 d in seen 将是 True。因此，让我们以跟踪数组 seen_before[d] 中在 d 之前看到的值的数量的方式重写代码。

import numpy as np
def item_index_bs(item, distinct, n_symbols, Cl):
    length = len(item)
    offset = 0
    seen = set()
    seen_before = np.zeros(n_symbols, dtype=np.uint64)
    for i, di in enumerate(item):
        offset += Cl[distinct][length - 1 - i][len(seen)] * int(seen_before[di]) \
           + Cl[distinct][length - 1 - i][len(seen) + 1] * int(di - seen_before[di]);
        if di not in seen:
            seen.add(di)
            seen_before[di+1:] += 1;

    return offset

这可以用

来测试

pp = item_at(256, 300, 200, 300, Cl)
item_index_factored(pp, 200, 300, Cl) # 1.8ms
item_index(pp, 200, 300, Cl) # 5.39ms

修改`item_at`

对于 item_at，我们不能像 item_index 那样简单地对术语进行分组，但我们可以潜在地跳过一些迭代，比如 idx如果看到该项目则减少a，否则减少b，因此最多减少max(a,b) 并且至少需要 idx//max(a,b) 才能找到要使用的数字。然后我们通过将 a 和 b 乘以它们各自的系数来进行更新。

def item_at_skip(idx, length, distinct, n_symbols, Cl):
    seen = [0] * n_symbols;
    prefix = [0] * length
    used = 0
    for i in range(length):
        a = Cl[distinct][length - 1 - i][used];
        b = Cl[distinct][length - 1 - i][used + 1]
        c = idx // max(a,b) # d will be at least c
        ac = sum(seen[:c]) # the number of time a is subtracted
        idx -= a * ac + b * (c - ac);
        for d in range(c, n_symbols):
            if seen[d] != 0:
                branch_count = a
            else:
                branch_count = b
            if branch_count <= idx:
                idx -= branch_count
            else:
                prefix[i] = d
                if seen[d] == 0:
                    used += 1
                    seen[d] = 1
                break
    return prefix
assert item_at_skip(10**200, 300, 200, 300, Cl) == item_at(10**200, 300, 200, 300, Cl)

item_at_skip(10**200, 300, 200, 300, Cl) # 3.16ms
item_at(10**200, 300, 200, 300, Cl) # 6.25ms

关于python - 比提供的解决方案更快地获取排列索引和索引处的排列，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/71207863/

文章推荐： c# - 在 C# 中序列化和反序列化自定义异常

文章推荐： python - 容器重启原因 OOMKilled，退出代码为 1

文章推荐： javascript - 解读 Uniswap V3 价格

python - Python 中的集群或合并集群以减少组数 (Python)
我正在处理一组标记为 160 个组的 173k 点。我想通过合并最接近的(到 9 或 10 个组)来减少组/集群的数量。我搜索过 sklearn 或类似的库，但没有成功。我猜它只是通过 knn 聚类
python - python 列表的子集基于同一列表的元素组，pythonically
我有一个扁平数字列表，这些数字逻辑上以 3 为一组，其中每个三元组是 (number, __ignored, flag[0 or 1])，例如: [7,56,1, 8,0,0, 2,0,0, 6,1,
python - 激活 Python 虚拟环境并在另一个 Python 脚本中调用 Python 脚本
我正在使用 pipenv 来管理我的包。我想编写一个 python 脚本来调用另一个使用不同虚拟环境(VE)的 python 脚本。如何运行使用 VE1 的 python 脚本 1 并调用另一个 p
python - 在焕然一新的 Python 环境中以编程方式从 Python 内部执行 Python 文件
假设我有一个文件 script.py 位于 path = "foo/bar/script.py"。我正在寻找一种在 Python 中通过函数 execute_script() 从我的主要 Python
python - 从 python 脚本但在 python 脚本之外运行 python 脚本
这听起来像是谜语或笑话，但实际上我还没有找到这个问题的答案。问题到底是什么？我想运行 2 个脚本。在第一个脚本中，我调用另一个脚本，但我希望它们继续并行，而不是在两个单独的线程中。主要是我不希望第
python - 使用不同的 python 从 python 运行 python 脚本
我有一个带有 python 2.5.5 的软件。我想发送一个命令，该命令将在 python 2.7.5 中启动一个脚本，然后继续执行该脚本。我试过用 #!python2.7.5 和http://re
python - 为什么从 Python 命令行调用 Python 时 Python 无法找到并运行我的脚本？
我在 python 命令行(使用 python 2.7)中，并尝试运行 Python 脚本。我的操作系统是 Windows 7。我已将我的目录设置为包含我所有脚本的文件夹，使用: os.chdir("
python - 使用动态版本的 Python 执行嵌入的 Python 代码时出现致命的 Python 错误
剧透:部分解决(见最后)。以下是使用 Python 嵌入的代码示例: #include int main(int argc, char** argv) { Py_SetPythonHome
python - python 中识别 python 数组或列表中最大累积差异的最快方法是什么？
假设我有以下列表，对应于及时的股票价格: prices = [1, 3, 7, 10, 9, 8, 5, 3, 6, 8, 12, 9, 6, 10, 13, 8, 4, 11] 我想确定以下总体上最
python - (Python) 通过单选按钮 python 更新背景
所以我试图在选择某个单选按钮时更改此框架的背景。我的框架位于一个类中，并且单选按钮的功能位于该类之外。 (这样我就可以在所有其他框架上调用它们。) 问题是每当我选择单选按钮时都会出现以下错误: co
python - python 中的字符串与正则表达式比较在 python 中失败
我正在尝试将字符串与 python 中的正则表达式进行比较，如下所示， #!/usr/bin/env python3 import re str1 = "Expecting property name
python - python 如何加载Boost.Python 库？
考虑以下原型(prototype) Boost.Python 模块，该模块从单独的 C++ 头文件中引入类“D”。 /* file: a/b.cpp */ BOOST_PYTHON_MODULE(c)
python - python 检查模块 python 的问题
如何编写一个程序来“识别函数调用的行号？” python 检查模块提供了定位行号的选项，但是， def di(): return inspect.currentframe().f_back.f_l
python - 系统 python 与用户 python
我已经使用 macports 安装了 Python 2.7，并且由于我的 $PATH 变量，这就是我输入 $ python 时得到的变量。然而，virtualenv 默认使用 Python 2.6，除
python - [Python] : Python re. 长字符串行的搜索速度优化
我只想问如何加快 python 上的 re.search 速度。我有一个很长的字符串行，长度为 176861(即带有一些符号的字母数字字符)，我使用此函数测试了该行以进行研究: def getExe
python - 编辑字符串 python 正则表达式 python
list1= [u'%app%%General%%Council%', u'%people%', u'%people%%Regional%%Council%%Mandate%', u'%ppp%%Ge
python - Python 映射中的副作用(Python "do" block )
这个问题在这里已经有了答案: Is it Pythonic to use list comprehensions for just side effects? (7 个答案) 关闭 4 个月前。告
python - 使用其值逻辑组合两个 python 列表 - Python
我想用 Python 将两个列表组合成一个列表，方法如下: a = [1,1,1,2,2,2,3,3,3,3] b= ["Sun", "is", "bright", "June","and" ,"Ju
python - Boost.Python python 链接错误
我正在运行带有最新 Boost 发行版 (1.55.0) 的 Mac OS X 10.8.4 (Darwin 12.4.0)。我正在按照说明 here构建包含在我的发行版中的教程 Boost-Pyth
python - 在 Python 中仅使用内置库制作一个基本的网络抓取工具 - Python
学习 Python，我正在尝试制作一个没有任何第 3 方库的网络抓取工具，这样过程对我来说并没有简化，而且我知道我在做什么。我浏览了一些在线资源，但所有这些都让我对某些事情感到困惑。 html 看起来

行者123

个人简介

我是一名优秀的程序员,十分优秀！

作者热门文章

滴滴打车优惠券免费领取

全站热门文章

首页

博学

6Ren·AI

商城