gpt4 book ai didi

c# - 找到所有 k 大小的子集,其中包含 n 大小的重复未排序正整数袋的总和 s

转载 作者:塔克拉玛干 更新时间:2023-11-03 03:00:03 24 4
gpt4 key购买 nike

请注意,这是 C# .NET 2.0 项目所必需的(不允许使用 Linq)。

我知道这里已经提出了非常相似的问题,并且我已经生成了一些工作代码(见下文),但仍然希望获得关于如何在给定 k 和 s 条件下使算法更快的建议。

这是我到目前为止学到的:动态规划是找到一个(不是所有)子集的最有效方法。如果我错了,请纠正我。有没有一种方法可以重复调用 DP 代码来生成更新的子集,直到袋子(设置重复项)用完?

如果没有,那么有没有一种方法可以加速我下面的回溯递归算法,它确实产生了我需要的东西,但在 O(2^n) 中运行,我认为,通过考虑 s 和 k?

这是我固定的数字包,永远不会随着 n=114 和数字范围从 3 到 286 改变:

    int[] numbers = new int[]
{
7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
5, 4, 5, 6
};

要求

  • 最大 2-3GB 的空间限制,但时间应该是 O(n^something) 而不是(某物^n)。

  • 不得对行李进行分类,不得移除重复的行李。

  • 结果应该是匹配中数字的索引子集,而不是数字本身(因为我们有重复项)。

动态编程尝试

这是根据对 stackoverflow.com 上类似问题的回答改编的 C# 动态编程版本:

using System;
using System.Collections.Generic;

namespace Utilities
{
public static class Combinations
{
private static Dictionary<int, bool> m_memo = new Dictionary<int, bool>();
private static Dictionary<int, KeyValuePair<int, int>> m_previous = new Dictionary<int, KeyValuePair<int, int>>();
static Combinations()
{
m_memo.Clear();
m_previous.Clear();
m_memo[0] = true;
m_previous[0] = new KeyValuePair<int, int>(-1, 0);

}

public static bool FindSubset(IList<int> set, int sum)
{
//m_memo.Clear();
//m_previous.Clear();
//m_memo[0] = true;
//m_previous[0] = new KeyValuePair<int, int>(-1, 0);

for (int i = 0; i < set.Count; ++i)
{
int num = set[i];
for (int s = sum; s >= num; --s)
{
if (m_memo.ContainsKey(s - num) && m_memo[s - num] == true)
{
m_memo[s] = true;

if (!m_previous.ContainsKey(s))
{
m_previous[s] = new KeyValuePair<int, int>(i, num);
}
}
}
}

return m_memo.ContainsKey(sum) && m_memo[sum];
}
public static IEnumerable<int> GetLastIndex(int sum)
{
while (m_previous[sum].Key != -1)
{
yield return m_previous[sum].Key;
sum -= m_previous[sum].Value;
}
}

public static void SubsetSumMain(string[] args)
{
int[] numbers = new int[]
{
7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
5, 4, 5, 6
};

int sum = 400;
//int size = 4; // don't know to use in dynamic programming

// call dynamic programming
if (Numbers.FindSubset(numbers, sum))
{
foreach (int index in Numbers.GetLastIndex(sum))
{
Console.Write((index + 1) + "." + numbers[index] + "\t");
}
Console.WriteLine();
}
Console.WriteLine();

Console.ReadKey();
}
}
}

递归编程尝试

这是根据 stackoverflow.com 上类似问题的答案改编的 C# 递归编程版本:

using System;
using System.Collections.Generic;

namespace Utilities
{
public static class Combinations
{
private static int s_count = 0;
public static int CountSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
{
if ((numbers.Length <= index) || (current > sum)) return 0;
if (result == null) result = new List<int>();

List<int> temp = new List<int>(result);
if (current + numbers[index] == sum)
{
temp.Add(index);
if ((size == 0) || (temp.Count == size))
{
s_count++;
}
}
else if (current + numbers[index] < sum)
{
temp.Add(index);
CountSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
}

CountSubsets(numbers, index + 1, current, sum, size, result);
return s_count;
}

private static List<List<int>> m_subsets = new List<List<int>>();
public static List<List<int>> FindSubsets(int[] numbers, int index, int current, int sum, int size, List<int> result)
{
if ((numbers.Length <= index) || (current > sum)) return m_subsets;
if (result == null) result = new List<int>();

List<int> temp = new List<int>(result);
if (current + numbers[index] == sum)
{
temp.Add(index);
if ((size == 0) || (temp.Count == size))
{
m_subsets.Add(temp);
}
}
else if (current + numbers[index] < sum)
{
temp.Add(index);
FindSubsets(numbers, index + 1, current + numbers[index], sum, size, temp);
}

FindSubsets(numbers, index + 1, current, sum, size, result);

return m_subsets;
}

public static void SubsetSumMain(string[] args)
{
int[] numbers = new int[]
{
7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
5, 4, 5, 6
};

int sum = 17;
int size = 2;

// call backtracking recursive programming
Console.WriteLine("CountSubsets");
int count = Numbers.CountSubsets(numbers, 0, 0, sum, size, null);
Console.WriteLine("Count = " + count);
Console.WriteLine();

// call backtracking recursive programming
Console.WriteLine("FindSubsets");
List<List<int>> subsets = Numbers.FindSubsets(numbers, 0, 0, sum, size, null);
for (int i = 0; i < subsets.Count; i++)
{
if (subsets[i] != null)
{
Console.Write((i + 1).ToString() + ":\t");
for (int j = 0; j < subsets[i].Count; j++)
{
int index = subsets[i][j];
Console.Write((index + 1) + "." + numbers[index] + " ");
}
Console.WriteLine();
}
}
Console.WriteLine("Count = " + subsets.Count);

Console.ReadKey();
}
}
}

请告诉我如何将动态规划版本限制为大小为 k 的子集,以及我是否可以重复调用它,以便它在每次调用时返回不同的子集,直到没有更多匹配的子集为止。

我也不知道在哪里初始化DP算法的备忘录。我在访问任何方法时自动运行的静态构造函数中完成了它。这是正确的初始化位置还是需要将其移动到 FindSunset() 方法内部 [注释掉]?

至于递归版本,是回溯吗?我们怎样才能加快速度。它工作正常并考虑了 k 和 s,但效率很低。

让我们将此线程作为所有 C# SubsetSum 相关问题的源头!

最佳答案

我之前的回答基于切断要检查的组合数量的原则。但是一旦你对数组进行排序,这就可以得到显着改善。原理类似,但是解决方案完全不同,所以我决定放在一个单独的答案中。

我小心翼翼地只使用 .Net Framework 2.0 功能。稍后可能会添加直观的解释,但评论应该足够了。

class Puzzle
{
private readonly int[] _tailSums;
public readonly SubsetElement[] Elements;
public readonly int N;

public Puzzle(int[] numbers)
{
// Set N and make Elements array
// (to remember the original index of each element)
this.N = numbers.Length;
this.Elements = new SubsetElement[this.N];

for (var i = 0; i < this.N; i++)
{
this.Elements[i] = new SubsetElement(numbers[i], i);
}

// Sort Elements descendingly by their Number value
Array.Sort(this.Elements, (a, b) => b.Number.CompareTo(a.Number));

// Save tail-sums to allow immediate access by index
// Allow immedate calculation by index = N, to sum 0
this._tailSums = new int[this.N + 1];
var sum = 0;

for (var i = this.N - 1; i >= 0; i--)
{
this._tailSums[i] = sum += this.Elements[i].Number;
}
}

public void Solve(int s, Action<SubsetElement[]> callback)
{
for (var k = 1; k <= this.N; k++)
this.Solve(k, s, callback);
}

public void Solve(int k, int s, Action<SubsetElement[]> callback)
{
this.ScanSubsets(0, k, s, new List<SubsetElement>(), callback);
}

private void ScanSubsets(int startIndex, int k, int s,
List<SubsetElement> subset, Action<SubsetElement[]> cb)
{
// No more numbers to add, and current subset is guranteed to be valid
if (k == 0)
{
// Callback with current subset
cb(subset.ToArray());
return;
}

// Sum the smallest k elements
var minSubsetStartIndex = this.N - k;
var minSum = this._tailSums[minSubssetStartIndex];

// Smallest possible sum is greater than wanted sum,
// so a valid subset cannot be found
if (minSum > s)
{
return;
}

// Find largest number that satisfies the condition
// that a valid subset can be found
minSum -= this.Elements[minSubsetStartIndex].Number;

// But remember the last index that satisfies the condition
var minSubsetEndIndex = minSubsetStartIndex;

while (minSubsetStartIndex > startIndex &&
minSum + this.Elements[minSubsetStartIndex - 1].Number <= s)
{
minSubsetStartIndex--;
}

// Find the first number in the sorted sequence that is
// the largest number we just found (in case of duplicates)
while (minSubsetStartIndex > startIndex &&
Elements[minSubsetStartIndex] == Elements[minSubsetStartIndex - 1])
{
minSubsetStartIndex--;
}

// [minSubsetStartIndex .. maxSubsetEndIndex] is the
// full range we must check in recursion

for (var subsetStartIndex = minSubsetStartIndex;
subsetStartIndex <= minSubsetEndIndex;
subsetStartIndex++)
{
// Find the largest possible sum, which is the sum of the
// k first elements, starting at current subsetStartIndex
var maxSum = this._tailSums[subsetStartIndex] -
this._tailSums[subsetStartIndex + k];

// The largest possible sum is less than the wanted sum,
// so a valid subset cannot be found
if (maxSum < s)
{
return;
}

// Add current number to the subset
var x = this.Elements[subsetStartIndex];
subset.Add(x);

// Recurse through the sub-problem to the right
this.ScanSubsets(subsetStartIndex + 1, k - 1, s - x.Number, subset, cb);

// Remove current number and continue loop
subset.RemoveAt(subset.Count - 1);
}
}

public sealed class SubsetElement
{
public readonly int Number;
public readonly int Index;

public SubsetElement(int number, int index)
{
this.Number = number;
this.Index = index;
}

public override string ToString()
{
return string.Format("{0}({1})", this.Number, this.Index);
}
}
}

使用和性能测试:

private static void Main()
{
var sw = Stopwatch.StartNew();
var puzzle = new Puzzle(new[]
{
7, 286, 200, 176, 120, 165, 206, 75, 129, 109,
123, 111, 43, 52, 99, 128, 111, 110, 98, 135,
112, 78, 118, 64, 77, 227, 93, 88, 69, 60,
34, 30, 73, 54, 45, 83, 182, 88, 75, 85,
54, 53, 89, 59, 37, 35, 38, 29, 18, 45,
60, 49, 62, 55, 78, 96, 29, 22, 24, 13,
14, 11, 11, 18, 12, 12, 30, 52, 52, 44,
28, 28, 20, 56, 40, 31, 50, 40, 46, 42,
29, 19, 36, 25, 22, 17, 19, 26, 30, 20,
15, 21, 11, 8, 8, 19, 5, 8, 8, 11,
11, 8, 3, 9, 5, 4, 7, 3, 6, 3,
5, 4, 5, 6
});

puzzle.Solve(2, 17, PuzzleOnSubsetFound);

sw.Stop();
Console.WriteLine("Subsets found: " + _subsetsCount);
Console.WriteLine(sw.Elapsed);
}

private static int _subsetsCount;

private static void PuzzleOnSubsetFound(Puzzle.SubsetElement[] subset)
{
_subsetsCount++;
return; // Skip prints when speed-testing

foreach (var el in subset)
{
Console.Write(el.ToString());
Console.Write(" ");
}

Console.WriteLine();
}

输出:

每一行都是一个找到的子集,()中的数字是子集中使用的数字的原始索引

14(60) 3(107)
14(60) 3(109)
14(60) 3(102)
13(59) 4(105)
13(59) 4(111)
12(64) 5(96)
12(64) 5(104)
12(64) 5(112)
12(64) 5(110)
12(65) 5(96)
12(65) 5(104)
12(65) 5(112)
12(65) 5(110)
11(100) 6(108)
11(100) 6(113)
11(61) 6(108)
11(61) 6(113)
11(92) 6(108)
11(92) 6(113)
11(62) 6(108)
11(62) 6(113)
11(99) 6(108)
11(99) 6(113)
9(103) 8(93)
9(103) 8(94)
9(103) 8(97)
9(103) 8(98)
9(103) 8(101)
Subsets found: 28
00:00:00.0017020 (measured when no printing is performed)


k 越高,可以进行的截断次数越多。这时您会看到主要的性能差异。当 s 变高时,您当前的代码(递归版本)的执行速度也会明显变慢。

With k=5, s=60
Your current code: found 45070 subsets in 2.8602923 seconds
My code: found 45070 subsets in 0.0116727 seconds
That is 99.6% speed improvement

关于c# - 找到所有 k 大小的子集,其中包含 n 大小的重复未排序正整数袋的总和 s,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30006497/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com