gpt4 book ai didi

c++ - 手动编码的快速排序在较小的整数上较慢

转载 作者:塔克拉玛干 更新时间:2023-11-02 23:31:13 24 4
gpt4 key购买 nike

当比较我的编译器上的快速排序实现与 std::sort 以及合并排序的实现时,我注意到大型数据集上的一个奇怪模式:当对 64 位整数进行操作时,快速排序始终比合并排序快;然而,在较小的 int 大小上,快速排序变得更慢,而合并排序变得更快。

测试代码如下:

#include <iostream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <utility>
#include <random>
#include <chrono>
#include <limits>
#include <functional>

#include <cstdint>


template <typename Iterator>
void insertion_sort(Iterator first, Iterator last)
{
using namespace std;

Iterator head = first;
Iterator new_position;

while(head != last)
{
new_position = head;
while(new_position != first && *new_position < *prev(new_position))
{
swap(*new_position, *prev(new_position));
--new_position;
}
++head;
}
}

template <typename Iterator>
void recursive_mergesort_impl(Iterator first, Iterator last, std::vector<typename Iterator::value_type>& temp)
{
if(last - first > 32)
{
auto middle = first + (last-first)/2;
recursive_mergesort_impl(first, middle, temp);
recursive_mergesort_impl(middle, last, temp);
auto last_merged = merge_move(first, middle, middle, last, temp.begin());
std::move(temp.begin(), last_merged, first);
}
else
{
insertion_sort(first, last);
}
}

template <typename Iterator>
void recursive_mergesort(Iterator first, Iterator last)
{
std::vector<typename Iterator::value_type> temp(last-first);
recursive_mergesort_impl(first, last, temp);
}

// Pick a pivot and move it to front of range
template <typename Iterator>
template <typename Iterator>
void quicksort_pivot_back(Iterator first, Iterator last)
{
using namespace std;

auto middle = first + (last-first)/2;
auto last_elem = prev(last);
Iterator pivot;

if(*first < *middle)
{
if(*middle < *last_elem)
pivot = middle;
else if(*first < *last_elem)
pivot = last_elem;
else
pivot = first;
}
else if(*first < *last_elem)
pivot = first;
else if(*middle < *last_elem)
pivot = last_elem;
else
pivot = middle;

swap(*last_elem, *pivot);
}

template <typename Iterator, typename Function>
std::pair<Iterator, Iterator> quicksort_partition(Iterator first, Iterator last, Function pivot_select)
{
using namespace std;

pivot_select(first, last);

auto pivot = prev(last);
auto bottom = first;
auto top = pivot;

while(bottom != top)
{
if(*bottom < *pivot) ++bottom;
else swap(*bottom, *--top);
}

swap(*pivot, *top++);

return make_pair(bottom, top);
}

template <typename Iterator>
void quicksort_loop(Iterator first, Iterator last)
{
using namespace std;

while(last - first > 32)
{
auto bounds = quicksort_partition(first, last, quicksort_pivot_back<Iterator>);

quicksort_loop(bounds.second, last);
last = bounds.first;
}
}


template <typename Iterator>
void quicksort(Iterator first, Iterator last)
{
quicksort_loop(first, last);
insertion_sort(first, last);
}

template <typename IntType = uint64_t, typename Duration = std::chrono::microseconds, typename Timer = std::chrono::high_resolution_clock, typename Function, typename Generator>
void run_trial(Function sort_func, Generator gen, std::string name, std::size_t trial_size, std::size_t trial_count)
{
using namespace std;
using namespace chrono;

vector<IntType> data(trial_size);

Duration elapsed(0);

cout << "Sorting with " << name << endl;

for(unsigned int i = 0; i < trial_count; ++i)
{
generate(data.begin(), data.end(), gen);

auto start = Timer::now();
sort_func(data.begin(), data.end());
auto finish = Timer::now();

elapsed += duration_cast<Duration>(finish-start);
}

cout << "Done. Average elapsed time: " << elapsed.count() / trial_count << endl;
cout << "Is correct: " << is_sorted(data.begin(), data.end()) << endl << endl;
}

int main()
{
using namespace std;
using namespace chrono;

using int_type = uint64_t;
const size_t trial_size = 12800000;
const int trial_count = 15;

vector<int_type> data(trial_size);
uniform_int_distribution<int_type> distr;
mt19937_64 rnd;

run_trial<int_type>(recursive_mergesort<vector<int_type>::iterator>, bind(distr, rnd), "recursive mergesort", trial_size, trial_count);
run_trial<int_type>(quicksort<vector<int_type>::iterator>, bind(distr, rnd), "quicksort", trial_size, trial_count);
run_trial<int_type>(sort<vector<int_type>::iterator>, bind(distr, rnd), "std::sort", trial_size, trial_count);
}

以下是 12800000 个元素的 15 次试验的时间:

uint64_t:

Sorting with recursive mergesort
Done. Average elapsed time: 1725431
Is correct: 1

Sorting with quicksort
Done. Average elapsed time: 1238070
Is correct: 1

Sorting with std::sort
Done. Average elapsed time: 1131464
Is correct: 1

uint16_t:

Sorting with recursive mergesort
Done. Average elapsed time: 1186467
Is correct: 1

Sorting with quicksort
Done. Average elapsed time: 2368535
Is correct: 1

Sorting with std::sort
Done. Average elapsed time: 888517
Is correct: 1

我感觉这个问题与未对齐的内存访问有关,但这仍然让我想知道为什么其他算法会加速而快速排序会变慢。

最佳答案

使用 uint16_t,您将在如此大的数组中得到很多重复项:按照预期,0 到 65535 中的每一个出现 195 次。没有 three-way ("fat") partition ,或者至少返回其正在处理的子数组中重复出现的枢轴值的中间,这会导致快速排序变为二次排序。 (尝试在仅包含零的数组上用铅笔和纸执行朴素的快速排序以查看效果。)

关于c++ - 手动编码的快速排序在较小的整数上较慢,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23501734/

24 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com