gpt4 book ai didi

string - 如何找到给定条件描述的所有字符串?

转载 作者:塔克拉玛干 更新时间:2023-11-03 06:28:57 26 4
gpt4 key购买 nike

问题:

  1. 给定一个整数k。
  2. 有一个由10组成的string(例如'10110'),在此处将其命名为二进制代码
  3. 二进制码的长度是2^k+k-1(例如给定k是2,所以二进制码的长度是5)
  4. 如果二进制码的长度为k的子串在二进制码中只出现一次。

那么二进制代码就是我们要找的。例如:

Given k is 2 (for example, the length of the matched binary code is 5. Code "10011" is a match, because its substring of lenght 2 are "10","00","01","11", and all of them occured in the binary code only once.). all of such binary codes are "00110", "10011", "11001", "01100"

我正在寻找一种算法来尽快找到给定 k 的所有二进制代码

最佳答案

我们可以将二进制代码建模为长度k的所有组合的排列。我们有 2^k 组合,它们可以排列成 (2^k)! 排列。代码比较多,但是满足需求3。长度2^k+k-1表示k-1每个长度组合的后缀k 必须是下一个组合的 k-1 前缀。当每个新组合添加一个新符号时,我们有第一个长度 k 2^k - 1 长度 k 的新组合。所以我们可以非常快地修剪所有这些 (2^k)!

我们可以将长度 k 的组合建模为二进制数。然后每个“代码”可以从这些数字中的一个开始,下一个数字必须从前一个数字开始有 k-1 位,并添加一个新的位 0 或 1。这可以通过向左移动来完成1 并添加 0 或 1,然后屏蔽到 k 位。新号码只有在没有被使用过的情况下才能使用,所以我们必须记住使用过的号码。我们将生成满足上述要求的长度为 2^k 的序列。结果,我们将这些序列转换为相应的二进制代码。这意味着我们使用第一个数字的所有位,并从所有下一个数字中添加最少的位。或者我们可以使用所有数字的最高位并从最后一位开始添加所有位。

Erlang 中的结果代码:

-module(binary_code).

-export([gen/1]).

gen(K) ->
N = (1 bsl K) - 1,
gen(N, K, lists:seq(0, N)).

gen(Mask, K, L) ->
[ [ $0 + B || <<B:1>> <= <<X:K>> ] ++ V
|| X <- L, V <- gen(Mask, Mask, X, [X]) ].

gen(_, 0, _, _) -> [[]];
gen(Mask, N, Prev, Prefix) ->
P = (Prev bsl 1) band Mask,
[ [$0 + (X band 1)|V] || X <- [P, P bor 1],
not lists:member(X, Prefix),
V <- gen(Mask, N-1, X, [X|Prefix])
].

结果:

44> binary_code:gen(1).
["01","10"]
45> binary_code:gen(2).
["00110","01100","10011","11001"]
46> binary_code:gen(3).
["0001011100","0001110100","0010111000","0011101000",
"0100011101","0101110001","0111000101","0111010001",
"1000101110","1000111010","1010001110","1011100010",
"1100010111","1101000111","1110001011","1110100011"]
47> binary_code:gen(4).
["0000100110101111000","0000100111101011000",
"0000101001101111000","0000101001111011000",
"0000101100111101000","0000101101001111000",
"0000101111001101000","0000101111010011000",
"0000110010111101000","0000110100101111000",
"0000110101111001000","0000110111100101000",
"0000111100101101000","0000111101001011000",
"0000111101011001000","0000111101100101000",
"0001001101011110000","0001001111010110000",
"0001010011011110000","0001010011110110000",
"0001011001111010000","0001011010011110000",
"0001011110011010000","0001011110100110000",
"0001100101111010000","0001101001011110000",
"0001101011110010000","0001101111001010000",
[...]|...]
48> length(v(47)).
256
49> binary_code:gen(5).
["000001000110010100111010110111110000",
"000001000110010100111011010111110000",
"000001000110010100111110101101110000",
"000001000110010100111110110101110000",
"000001000110010101101001110111110000",
"000001000110010101101001111101110000",
"000001000110010101101110100111110000",
"000001000110010101101111101001110000",
"000001000110010101110110100111110000",
"000001000110010101111101101001110000",
"000001000110010110101001110111110000",
"000001000110010110101001111101110000",
"000001000110010110111010100111110000",
"000001000110010110111110101001110000",
"000001000110010111011010100111110000",
"000001000110010111110110101001110000",
"000001000110011101001010110111110000",
"000001000110011101010010110111110000",
"000001000110011101101001010111110000",
"000001000110011101101010010111110000",
"000001000110011111010010101101110000",
"000001000110011111010100101101110000",
"000001000110011111011010010101110000",
"000001000110011111011010100101110000",
"000001000110100101011001110111110000",
"000001000110100101011001111101110000",
"000001000110100101011100111110110000",
"000001000110100101011101100111110000",
[...]|...]
50> length(v(49)).
65536

似乎二进制代码的数量是2^(2^(k-1))。它迅速升级。要为更高的k 生成二进制代码,我建议使用C 或ASM。 (对于 k=6,2^32 = 4,294,967,296。)

编辑:

有我的attempt在 C 中实现它并且效果很好。

$ ./binary_code 1
01
10
$ ./binary_code 2
00110
01100
10011
11001
$ ./binary_code 3
0001011100
0001110100
0010111000
0011101000
0100011101
0101110001
0111000101
0111010001
1000101110
1000111010
1010001110
1011100010
1100010111
1101000111
1110001011
1110100011
$ time ./binary_code 4 | wc
256 256 5120

real 0m0.003s
user 0m0.000s
sys 0m0.000s
$ time ./binary_code 5 | wc
65536 65536 2424832

real 0m0.053s
user 0m0.088s
sys 0m0.000s
$ ./binary_code 6 | head -n 20
000000100001100010100011100100101100110100111101010111011011111100000
000000100001100010100011100100101100110100111101101110101011111100000
000000100001100010100011100100101100110100111111010101110110111100000
000000100001100010100011100100101100110100111111011011101010111100000
000000100001100010100011100100101100110101011101001111011011111100000
000000100001100010100011100100101100110101011101001111110110111100000
000000100001100010100011100100101100110101011101101111010011111100000
000000100001100010100011100100101100110101011101101111110100111100000
000000100001100010100011100100101100110101011110110111010011111100000
000000100001100010100011100100101100110101011111101101110100111100000
000000100001100010100011100100101100110110100111101010111011111100000
000000100001100010100011100100101100110110100111101110101011111100000
000000100001100010100011100100101100110110100111111010101110111100000
000000100001100010100011100100101100110110100111111011101010111100000
000000100001100010100011100100101100110110101011101001111011111100000
000000100001100010100011100100101100110110101011101001111110111100000
000000100001100010100011100100101100110110101011101111010011111100000
000000100001100010100011100100101100110110101011101111110100111100000
000000100001100010100011100100101100110110101011110111010011111100000
000000100001100010100011100100101100110110101011111101110100111100000

$ time ./binary_code 6 | wc
4294967296 4294967296 300647710720

real 123m18.854s
user 183m36.848s
sys 2m33.652s

$ time ./binary_code 6 > /dev/null

real 63m5.656s
user 62m50.808s
sys 0m11.072s

对于 k=6,它可以生成 75MB/s 的二进制代码。我也试过解决它without recursion但是 gcc 必须在引擎盖下做一些惊人的事情,因为非递归版本比我的第一个直接递归版本慢大约 10%。

关于string - 如何找到给定条件描述的所有字符串?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/20422126/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com