gpt4 book ai didi

c++ - 表达式模板 : unroll loop

转载 作者:行者123 更新时间:2023-11-28 01:43:05 26 4
gpt4 key购买 nike

我有同样的问题: Expression templates: improving performance in evaluating expressions?

我的目标是展开这个表达式的循环

auto && intermediate = A+D*C
for(int i= 0; i<10 ;i++)
intermediate = intermediate + B
Vector result = intermediate * E

我想在整个二元表达式树的中间,最后类 Vector 的 operator=(Expression ) 运行图形检查使用我的代码,它只在没有循环的情况下工作(我使用表达式模板的经典实现,Joel Falcou @cppcon 2015 之一)

编辑:由于循环导致的代码编译问题

如果我取消对主循环的注释我有编译错误需要用c++11运行

g++ -std=c++11 -O3 -fopenmp -Wall -pedantic -pthread main.cpp && ./a.out

#include <vector>
#include <iostream>

template <typename TBase, typename Derived>
struct BaseExpression
{
Derived const& self() const { return static_cast<const Derived&>(*this); }
Derived & self() { return static_cast<Derived&>(*this); }
TBase operator[](size_t szIdx) const { return self()[szIdx]; }
size_t size() const {return self().size();}
};

template <typename TBase, typename Operator, typename OP1, typename OP2>
class Binary_Expression : public BaseExpression<TBase, Binary_Expression<TBase, Operator, OP1, OP2> >
{
public:
Binary_Expression(OP1 const & a, OP2 const & b) : op1(a), op2(b){}
TBase operator[] (size_t idx) const { return op(op1[idx], op2[idx]); }
size_t size() const { return op1.size() != 0 ? op1.size() : op2.size(); }


private:
const OP1 & op1;
const OP2 & op2;
Operator op;
};


template <typename TBase >
class Vector : public BaseExpression<TBase, Vector<TBase> >
{

public:
explicit Vector(size_t szSizeN) : m_xMemory(szSizeN){}

Vector(const Vector &orig): m_xMemory()
{
this->copy(orig);
}

Vector & operator=(const Vector &orig)
{
if (&orig != this)
{
Vector temp(orig);
this->swap(temp);
}

return *this;
}

Vector & operator=(TBase factor)
{
size_t szSizeN = size();
#pragma omp parallel for
for (size_t idx = 0; idx < szSizeN; idx++)
{
m_xMemory[idx] = factor;
}

return *this;
}

template <typename Expression>
Vector(const BaseExpression<TBase, Expression> &b) :m_xMemory(b.size())
{
size_t szSizeN = size();
#pragma omp parallel for
for (size_t idx = 0; idx < szSizeN; idx++)
{
m_xMemory[idx] = b[idx];
}

}

void swap(Vector &orig)
{
using std::swap;
swap(m_xMemory, orig.m_xMemory);
}

TBase operator[] (size_t idx) const { return m_xMemory[idx]; }

TBase & operator[] (size_t idx) { return m_xMemory[idx]; }

size_t size() const { return m_xMemory.size(); }

void print()
{
size_t szSizeN = size();
for (size_t idx = 0; idx < szSizeN; idx++)
{
std::cout << "Index=" << idx << "\t" << "Value=" << m_xMemory[idx] << std::endl;

}
}

private:
void copy(const Vector &orig)
{
m_xMemory = orig.m_xMemory;
}

std::vector<TBase> m_xMemory;
};


template <typename TBase, typename E1, typename E2>
Binary_Expression<TBase, std::plus<TBase>, E1, E2> operator+(const BaseExpression<TBase, E1> & xE1, const BaseExpression< TBase, E2> & xE2)
{
return Binary_Expression<TBase, std::plus<TBase>, E1, E2>(xE1.self(), xE2.self());
}


int main()
{
Vector<double> x1(10);
Vector<double> x2(10);
Vector<double> x3(10);

x1 = 7.5;
x2 = 8.;
x3 = 4.2;

auto && intermediate = x1 + x2;


//compil error
/*
for (int i = 0; i< 10; i++)
{
intermediate = intermediate + x3;
}
*/
// inspection of the graph here
Vector<double> result = intermediate + x2;


result.print();

}

事实上,在我的最终设计中,我想写以下内容:

   Vector<double> x1(10);
Vector<double> x2(10);
Vector<double> x3(10);

x1 = 7.5;
x2 = 8.;
x3 = 4.2;

Vector<double> intermediate = x1 + x2;
for (int i = 0; i < 5; ++i)
intermediate = intermediate + x3;

Vector<double> result = x1 + x3 + intermediate;
// finally into result I have the expression tree, and evaluate method which will make the graph inspection
result.evaluate();

提前致谢乔纳森

最佳答案

恐怕这行不通,因为链接技术依赖于 intermediate 的类型捕获整个表达式的变量。所以它看起来像 Sum<Mult<Vector,Vector>> (此处简化)。但是类型不能在 for 循环的每次迭代中改变。

我看到替代方案:

不要将表达式捕获为类型,而是捕获为运行时结构,类型比方说 VectorExpression。这会对性能产生影响,因为您必须在运行时分析表达式图并限制您可以进行的优化种类。

第二种选择是使用模板元编程编写您自己的 for 循环(每一步都有一个新类型)。

折叠函数的例子(这是你想要的)。我们必须使用折叠仿函数,因为不支持函数的部分特化:

#include <utility>

template <int N, class V, class F>
struct foldf {
auto operator()(V v, F&& f) -> decltype(auto) {
auto next = f(v);
return foldf<N - 1, decltype(next), F>()(next, std::move(f));
}
};

template <class V, class F>
struct foldf<0, V, F> {
auto operator()(V v, F&& f) -> decltype(auto) {
return v;
}
};

// just a helper to make usage simpler
template <int N>
class Repeat{};

template <int N, class V, class F>
auto fold(Repeat<N> tag, V v, F&& f) -> decltype(auto) {
return foldf<N, V, F>()(v, std::move(f));
}

为了证明它做了我们想要的,让我们添加这段代码:

template <class T>
class Test {
};

class Other{};

template <class T>
auto wrap(T t) -> decltype(auto) {
return Test<T>();
}

int main() {
auto v = fold(Repeat<3>(), 0, [](auto t){
return wrap(t);
});
Other x = v;
}

结果应该是tmp.cpp:42:11: error: no viable conversion from 'Test<Test<Test<int> > >' to 'Other' , 这表明类型已保留。

关于c++ - 表达式模板 : unroll loop,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46381692/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com