gpt4 book ai didi

c++ - 高效的字符串截断算法,顺序删除相等的前缀和后缀

转载 作者:行者123 更新时间:2023-12-01 11:59:05 25 4
gpt4 key购买 nike

Time limit per test: 5 seconds
Memory limit per test: 512 megabytes

You are given a string s of length n (n ≤ 5000). You can select any proper prefix of this string that is also its suffix and remove either selected prefix or corresponding suffix. Then you can apply an analogous operation to a resulting string and so on. What is the minimum length of the final string, that can be achieved after applying the optimal sequence of such operations?

Input
The first line of each test contains a string s that consists of small English letters.

Output
Output a single integer — the minimum length of the final string, that can be achieved after applying the optimal sequence of such operations.

Examples
+-------+--------+----------------------------------+
| Input | Output | Explanation |
+-------+--------+----------------------------------+
| caaca | 2 | caaca → ca|aca → aca → ac|a → ac |
+-------+--------+----------------------------------+
| aabaa | 2 | aaba|a → a|aba → ab|a → ab |
+-------+--------+----------------------------------+
| abc | 3 | No operations are possible |
+-------+--------+----------------------------------+



这是我迄今为止设法做的事情:
  • 计算 prefix function对于 中给定字符串的所有子字符串O(n^2)
  • 检查在 中执行所有可能的操作组合的结果O(n^3)

  • 我的解决方案通过了 n 的所有测试≤ 2000 但当 2000 < n 时超过时限≤ 5000。这是它的代码:
    #include <iostream>
    #include <string>

    using namespace std;

    const int MAX_N = 5000;

    int result; // 1 less than actual

    // [x][y] corresponds to substring that starts at position `x` and ends at position `x + y` =>
    // => corresponding substring length is `y + 1`
    int lps[MAX_N][MAX_N]; // prefix function for the substring s[x..x+y]
    bool checked[MAX_N][MAX_N]; // whether substring s[x..x+y] is processed by check function

    // length is 1 less than actual
    void check(int start, int length) {
    checked[start][length] = true;
    if (length < result) {
    if (length == 0) {
    cout << 1; // actual length = length + 1 = 0 + 1 = 1
    exit(0); // 1 is the minimum possible result
    }
    result = length;
    }
    // iteration over all proper prefixes that are also suffixes
    // i - current prefix length
    for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
    int newLength = length - i;
    int newStart = start + i;
    if (!checked[start][newLength])
    check(start, newLength);
    if (!checked[newStart][newLength])
    check(newStart, newLength);
    }
    }

    int main()
    {
    string str;
    cin >> str;
    int n = str.length();
    // lps calculation runs in O(n^2)
    for (int l = 0; l < n; l++) {
    int subLength = n - l;
    lps[l][0] = 0;
    checked[l][0] = false;
    for (int i = 1; i < subLength; ++i) {
    int j = lps[l][i - 1];
    while (j > 0 && str[i + l] != str[j + l])
    j = lps[l][j - 1];
    if (str[i + l] == str[j + l]) j++;
    lps[l][i] = j;
    checked[l][i] = false;
    }
    }
    result = n - 1;
    // checking all possible operations combinations in O(n^3)
    check(0, n - 1);
    cout << result + 1;
    }

    问:有没有更有效的解决方案?

    最佳答案

    这是获取对数因子的一种方法。让 dp[i][j]如果我们可以到达子串 s[i..j],则为真.然后:

    dp[0][length(s)-1] ->
    true

    dp[0][j] ->
    if s[0] != s[j+1]:
    false
    else:
    true if any dp[0][k]
    for j < k ≤ (j + longestMatchRight[0][j+1])

    (The longest match we can use is
    also bound by the current range.)

    (Initialise left side similarly.)

    现在从外到内迭代:
    for i = 1 to length(s)-2:
    for j = length(s)-2 to i:
    dp[i][j] ->
    // We removed on the right
    if s[i] != s[j+1]:
    false
    else:
    true if any dp[i][k]
    for j < k ≤ (j + longestMatchRight[i][j+1])

    // We removed on the left
    if s[i-1] != s[j]:
    true if dp[i][j]
    else:
    true if any dp[k][j]
    for (i - longestMatchLeft[i-1][j]) ≤ k < i

    我们可以预先计算每个起始对的最长匹配 (i, j)O(n^2)随着复发,
    longest(i, j) -> 
    if s[i] == s[j]:
    return 1 + longest(i + 1, j + 1)
    else:
    return 0

    这将允许我们检查从索引 i 开始的子字符串匹配。和 jO(1) . (我们需要左右两个方向。)

    如何获得对数因子

    我们可以想出一种方法来提出一个数据结构,让我们能够确定
    any dp[i][k]
    for j < k ≤ (j + longestMatchRight[i][j+1])

    (And similarly for the left side.)

    O(log n) ,考虑到我们已经看到了这些值。

    这是带有段树的 C++ 代码(用于左右查询,所以 O(n^2 * log n) ),其中包括 Bananon 的测试生成器。对于 5000 个“a”字符,它运行了 3.54 秒,420 MB( https://ideone.com/EIrhnR )。为了减少内存,在单行上实现了一个段树(我仍然需要调查对左侧查询做同样的事情以进一步减少内存。)
    #include <iostream>
    #include <string>
    #include <ctime>
    #include <random>
    #include <algorithm> // std::min

    using namespace std;

    const int MAX_N = 5000;

    int seg[2 * MAX_N];
    int segsL[MAX_N][2 * MAX_N];
    int m[MAX_N][MAX_N][2];
    int dp[MAX_N][MAX_N];
    int best;

    // Adapted from https://codeforces.com/blog/entry/18051
    void update(int n, int p, int value) { // set value at position p
    for (seg[p += n] = value; p > 1; p >>= 1)
    seg[p >> 1] = seg[p] + seg[p ^ 1];
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    int query(int n, int l, int r) { // sum on interval [l, r)
    int res = 0;
    for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
    if (l & 1) res += seg[l++];
    if (r & 1) res += seg[--r];
    }
    return res;
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    void updateL(int n, int i, int p, int value) { // set value at position p
    for (segsL[i][p += n] = value; p > 1; p >>= 1)
    segsL[i][p >> 1] = segsL[i][p] + segsL[i][p ^ 1];
    }
    // Adapted from https://codeforces.com/blog/entry/18051
    int queryL(int n, int i, int l, int r) { // sum on interval [l, r)
    int res = 0;
    for (l += n, r += n; l < r; l >>= 1, r >>= 1) {
    if (l & 1) res += segsL[i][l++];
    if (r & 1) res += segsL[i][--r];
    }
    return res;
    }

    // Code by גלעד ברקן
    void precalc(int n, string & s) {
    int i, j;
    for (i = 0; i < n; i++) {
    for (j = 0; j < n; j++) {
    // [longest match left, longest match right]
    m[i][j][0] = (s[i] == s[j]) & 1;
    m[i][j][1] = (s[i] == s[j]) & 1;
    }
    }

    for (i = n - 2; i >= 0; i--)
    for (j = n - 2; j >= 0; j--)
    m[i][j][1] = s[i] == s[j] ? 1 + m[i + 1][j + 1][1] : 0;

    for (i = 1; i < n; i++)
    for (j = 1; j < n; j++)
    m[i][j][0] = s[i] == s[j] ? 1 + m[i - 1][j - 1][0] : 0;
    }

    // Code by גלעד ברקן
    void f(int n, string & s) {
    int i, j, k, longest;

    dp[0][n - 1] = 1;
    update(n, n - 1, 1);
    updateL(n, n - 1, 0, 1);

    // Right side initialisation
    for (j = n - 2; j >= 0; j--) {
    if (s[0] == s[j + 1]) {
    longest = std::min(j + 1, m[0][j + 1][1]);
    for (k = j + 1; k <= j + longest; k++)
    dp[0][j] |= dp[0][k];
    if (dp[0][j]) {
    update(n, j, 1);
    updateL(n, j, 0, 1);
    best = std::min(best, j + 1);
    }
    }
    }

    // Left side initialisation
    for (i = 1; i < n; i++) {
    if (s[i - 1] == s[n - 1]) {
    // We are bound by the current range
    longest = std::min(n - i, m[i - 1][n - 1][0]);
    for (k = i - 1; k >= i - longest; k--)
    dp[i][n - 1] |= dp[k][n - 1];
    if (dp[i][n - 1]) {
    updateL(n, n - 1, i, 1);
    best = std::min(best, n - i);
    }
    }
    }

    for (i = 1; i <= n - 2; i++) {
    for (int ii = 0; ii < MAX_N; ii++) {
    seg[ii * 2] = 0;
    seg[ii * 2 + 1] = 0;
    }
    update(n, n - 1, dp[i][n - 1]);
    for (j = n - 2; j >= i; j--) {
    // We removed on the right
    if (s[i] == s[j + 1]) {
    // We are bound by half the current range
    longest = std::min(j - i + 1, m[i][j + 1][1]);
    //for (k=j+1; k<=j+longest; k++)
    //dp[i][j] |= dp[i][k];
    if (query(n, j + 1, j + longest + 1)) {
    dp[i][j] = 1;
    update(n, j, 1);
    updateL(n, j, i, 1);
    }
    }
    // We removed on the left
    if (s[i - 1] == s[j]) {
    // We are bound by half the current range
    longest = std::min(j - i + 1, m[i - 1][j][0]);
    //for (k=i-1; k>=i-longest; k--)
    //dp[i][j] |= dp[k][j];
    if (queryL(n, j, i - longest, i)) {
    dp[i][j] = 1;
    updateL(n, j, i, 1);
    update(n, j, 1);
    }
    }
    if (dp[i][j])
    best = std::min(best, j - i + 1);
    }
    }
    }

    int so(string s) {
    for (int i = 0; i < MAX_N; i++) {
    seg[i * 2] = 0;
    seg[i * 2 + 1] = 0;
    for (int j = 0; j < MAX_N; j++) {
    segsL[i][j * 2] = 0;
    segsL[i][j * 2 + 1] = 0;
    m[i][j][0] = 0;
    m[i][j][1] = 0;
    dp[i][j] = 0;
    }
    }
    int n = s.length();
    best = n;
    precalc(n, s);
    f(n, s);
    return best;
    }
    // End code by גלעד ברקן

    // Code by Bananon =======================================================================

    int result;

    int lps[MAX_N][MAX_N];
    bool checked[MAX_N][MAX_N];

    void check(int start, int length) {
    checked[start][length] = true;
    if (length < result) {
    result = length;
    }
    for (int i = lps[start][length]; i != 0; i = lps[start][i - 1]) {
    int newLength = length - i;
    if (!checked[start][newLength])
    check(start, newLength);
    int newStart = start + i;
    if (!checked[newStart][newLength])
    check(newStart, newLength);
    }
    }

    int my(string str) {
    int n = str.length();
    for (int l = 0; l < n; l++) {
    int subLength = n - l;
    lps[l][0] = 0;
    checked[l][0] = false;
    for (int i = 1; i < subLength; ++i) {
    int j = lps[l][i - 1];
    while (j > 0 && str[i + l] != str[j + l])
    j = lps[l][j - 1];
    if (str[i + l] == str[j + l]) j++;
    lps[l][i] = j;
    checked[l][i] = false;
    }
    }
    result = n - 1;
    check(0, n - 1);
    return result + 1;
    }

    // generate =================================================================

    bool rndBool() {
    return rand() % 2 == 0;
    }

    int rnd(int bound) {
    return rand() % bound;
    }

    void untrim(string & str) {
    int length = rnd(str.length());
    int prefixLength = rnd(str.length()) + 1;
    if (rndBool())
    str.append(str.substr(0, prefixLength));
    else {
    string newStr = str.substr(str.length() - prefixLength, prefixLength);
    newStr.append(str);
    str = newStr;
    }
    }

    void rndTest(int minTestLength, string s) {
    while (s.length() < minTestLength)
    untrim(s);
    int myAns = my(s);
    int soAns = so(s);
    cout << myAns << " " << soAns << '\n';
    if (soAns != myAns) {
    cout << s;
    exit(0);
    }
    }

    int main() {
    int minTestLength;
    cin >> minTestLength;
    string seed;
    cin >> seed;
    while (true)
    rndTest(minTestLength, seed);
    }

    这里是 JavaScript 代码(没有对数因子的改进)来表明重复是有效的。 (为了获得对数因子,我们用单个范围查询替换了内部 k 循环。)

    debug = 1

    function precalc(s){
    let m = new Array(s.length)
    for (let i=0; i<s.length; i++){
    m[i] = new Array(s.length)
    for (let j=0; j<s.length; j++){
    // [longest match left, longest match right]
    m[i][j] = [(s[i] == s[j]) & 1, (s[i] == s[j]) & 1]
    }
    }

    for (let i=s.length-2; i>=0; i--)
    for (let j=s.length-2; j>=0; j--)
    m[i][j][1] = s[i] == s[j] ? 1 + m[i+1][j+1][1] : 0

    for (let i=1; i<s.length; i++)
    for (let j=1; j<s.length; j++)
    m[i][j][0] = s[i] == s[j] ? 1 + m[i-1][j-1][0] : 0

    return m
    }

    function f(s){
    m = precalc(s)
    let n = s.length
    let min = s.length
    let dp = new Array(s.length)

    for (let i=0; i<s.length; i++)
    dp[i] = new Array(s.length).fill(0)

    dp[0][s.length-1] = 1

    // Right side initialisation
    for (let j=s.length-2; j>=0; j--){
    if (s[0] == s[j+1]){
    let longest = Math.min(j + 1, m[0][j+1][1])
    for (let k=j+1; k<=j+longest; k++)
    dp[0][j] |= dp[0][k]
    if (dp[0][j])
    min = Math.min(min, j + 1)
    }
    }

    // Left side initialisation
    for (let i=1; i<s.length; i++){
    if (s[i-1] == s[s.length-1]){
    let longest = Math.min(s.length - i, m[i-1][s.length-1][0])
    for (let k=i-1; k>=i-longest; k--)
    dp[i][s.length-1] |= dp[k][s.length-1]
    if (dp[i][s.length-1])
    min = Math.min(min, s.length - i)
    }
    }

    for (let i=1; i<=s.length-2; i++){
    for (let j=s.length-2; j>=i; j--){
    // We removed on the right
    if (s[i] == s[j+1]){
    // We are bound by half the current range
    let longest = Math.min(j - i + 1, m[i][j+1][1])
    for (let k=j+1; k<=j+longest; k++)
    dp[i][j] |= dp[i][k]
    }
    // We removed on the left
    if (s[i-1] == s[j]){
    // We are bound by half the current range
    let longest = Math.min(j - i + 1, m[i-1][j][0])
    for (let k=i-1; k>=i-longest; k--)
    dp[i][j] |= dp[k][j]
    }
    if (dp[i][j])
    min = Math.min(min, j - i + 1)
    }
    }

    if (debug){
    let str = ""
    for (let row of dp)
    str += row + "\n"
    console.log(str)
    }

    return min
    }

    function main(s){
    var strs = [
    "caaca",
    "bbabbbba",
    "baabbabaa",
    "bbabbba",
    "bbbabbbbba",
    "abbabaabbab",
    "abbabaabbaba",
    "aabaabaaabaab",
    "bbabbabbb"
    ]

    for (let s of strs){
    let t = new Date
    console.log(s)
    console.log(f(s))
    //console.log((new Date - t)/1000)
    console.log("")
    }
    }

    main()

    关于c++ - 高效的字符串截断算法,顺序删除相等的前缀和后缀,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/59687292/

    25 4 0
    Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
    广告合作:1813099741@qq.com 6ren.com