#include #include #include using namespace StormEigen; #ifndef SIZE #define SIZE 50 #endif #ifndef REPEAT #define REPEAT 10000 #endif typedef float Scalar; __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); int main(int argc, char* argv[]) { int size = SIZE * 8; int size2 = size * size; Scalar* a = internal::aligned_new(size2); Scalar* b = internal::aligned_new(size2+4)+1; Scalar* c = internal::aligned_new(size2); for (int i=0; i2 ; --innersize) { if (size2%innersize==0) { int outersize = size2/innersize; MatrixXf ma = Map(a, innersize, outersize ); MatrixXf mb = Map(b, innersize, outersize ); MatrixXf mc = Map(c, innersize, outersize ); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(ma, mb, mc); timer.stop(); } std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; } } VectorXf va = Map(a, size2); VectorXf vb = Map(b, size2); VectorXf vc = Map(c, size2); timer.reset(); for (int k=0; k<3; ++k) { timer.start(); benchVec(va, vb, vc); timer.stop(); } std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; return 0; } void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) { for (int k=0; k::type PacketScalar; const int PacketSize = internal::packet_traits::size; PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; for (int k=0; k