You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							135 lines
						
					
					
						
							5.1 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							135 lines
						
					
					
						
							5.1 KiB
						
					
					
				| 
 | |
| #include <iostream> | |
| #include <Eigen/Core> | |
| #include <bench/BenchTimer.h> | |
| using namespace Eigen; | |
| 
 | |
| #ifndef SIZE | |
| #define SIZE 50 | |
| #endif | |
|  | |
| #ifndef REPEAT | |
| #define REPEAT 10000 | |
| #endif | |
|  | |
| typedef float Scalar; | |
| 
 | |
| __attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size); | |
| __attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c); | |
| __attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c); | |
| 
 | |
| int main(int argc, char* argv[]) | |
| { | |
|     int size = SIZE * 8; | |
|     int size2 = size * size; | |
|     Scalar* a = internal::aligned_new<Scalar>(size2); | |
|     Scalar* b = internal::aligned_new<Scalar>(size2+4)+1; | |
|     Scalar* c = internal::aligned_new<Scalar>(size2);  | |
|      | |
|     for (int i=0; i<size; ++i) | |
|     { | |
|         a[i] = b[i] = c[i] = 0; | |
|     } | |
|      | |
|     BenchTimer timer; | |
|      | |
|     timer.reset(); | |
|     for (int k=0; k<10; ++k) | |
|     { | |
|         timer.start(); | |
|         benchVec(a, b, c, size2); | |
|         timer.stop(); | |
|     } | |
|     std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; | |
|     return 0; | |
|     for (int innersize = size; innersize>2 ; --innersize) | |
|     { | |
|         if (size2%innersize==0) | |
|         { | |
|             int outersize = size2/innersize; | |
|             MatrixXf ma = Map<MatrixXf>(a, innersize, outersize ); | |
|             MatrixXf mb = Map<MatrixXf>(b, innersize, outersize ); | |
|             MatrixXf mc = Map<MatrixXf>(c, innersize, outersize ); | |
|             timer.reset(); | |
|             for (int k=0; k<3; ++k) | |
|             { | |
|                 timer.start(); | |
|                 benchVec(ma, mb, mc); | |
|                 timer.stop(); | |
|             } | |
|             std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; | |
|         } | |
|     } | |
|      | |
|     VectorXf va = Map<VectorXf>(a, size2); | |
|     VectorXf vb = Map<VectorXf>(b, size2); | |
|     VectorXf vc = Map<VectorXf>(c, size2); | |
|     timer.reset(); | |
|     for (int k=0; k<3; ++k) | |
|     { | |
|         timer.start(); | |
|         benchVec(va, vb, vc); | |
|         timer.stop(); | |
|     } | |
|     std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n"; | |
| 
 | |
|     return 0; | |
| } | |
| 
 | |
| void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c) | |
| { | |
|     for (int k=0; k<REPEAT; ++k) | |
|         a = a + b; | |
| } | |
| 
 | |
| void benchVec(VectorXf& a, VectorXf& b, VectorXf& c) | |
| { | |
|     for (int k=0; k<REPEAT; ++k) | |
|         a = a + b; | |
| } | |
| 
 | |
| void benchVec(Scalar* a, Scalar* b, Scalar* c, int size) | |
| { | |
|     typedef internal::packet_traits<Scalar>::type PacketScalar; | |
|     const int PacketSize = internal::packet_traits<Scalar>::size; | |
|     PacketScalar a0, a1, a2, a3, b0, b1, b2, b3; | |
|     for (int k=0; k<REPEAT; ++k) | |
|         for (int i=0; i<size; i+=PacketSize*8) | |
|         { | |
| //             a0 = internal::pload(&a[i]); | |
| //             b0 = internal::pload(&b[i]); | |
| //             a1 = internal::pload(&a[i+1*PacketSize]); | |
| //             b1 = internal::pload(&b[i+1*PacketSize]); | |
| //             a2 = internal::pload(&a[i+2*PacketSize]); | |
| //             b2 = internal::pload(&b[i+2*PacketSize]); | |
| //             a3 = internal::pload(&a[i+3*PacketSize]); | |
| //             b3 = internal::pload(&b[i+3*PacketSize]); | |
| //             internal::pstore(&a[i], internal::padd(a0, b0)); | |
| //             a0 = internal::pload(&a[i+4*PacketSize]); | |
| //             b0 = internal::pload(&b[i+4*PacketSize]); | |
| //              | |
| //             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1)); | |
| //             a1 = internal::pload(&a[i+5*PacketSize]); | |
| //             b1 = internal::pload(&b[i+5*PacketSize]); | |
| //              | |
| //             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2)); | |
| //             a2 = internal::pload(&a[i+6*PacketSize]); | |
| //             b2 = internal::pload(&b[i+6*PacketSize]); | |
| //              | |
| //             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3)); | |
| //             a3 = internal::pload(&a[i+7*PacketSize]); | |
| //             b3 = internal::pload(&b[i+7*PacketSize]); | |
| //              | |
| //             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0)); | |
| //             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1)); | |
| //             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2)); | |
| //             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3)); | |
|              | |
|             internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize]))); | |
|             internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize]))); | |
|             internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize]))); | |
|             internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize]))); | |
|             internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize]))); | |
|             internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize]))); | |
|         } | |
| }
 |