You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							219 lines
						
					
					
						
							6.2 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							219 lines
						
					
					
						
							6.2 KiB
						
					
					
				
								// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
							 | 
						|
								// possible options:
							 | 
						|
								//    -DSTORMEIGEN_DONT_VECTORIZE
							 | 
						|
								//    -msse2
							 | 
						|
								
							 | 
						|
								// #define STORMEIGEN_DEFAULT_TO_ROW_MAJOR
							 | 
						|
								#define _FLOAT
							 | 
						|
								
							 | 
						|
								#include <iostream>
							 | 
						|
								
							 | 
						|
								#include <StormEigen/Core>
							 | 
						|
								#include "BenchTimer.h"
							 | 
						|
								
							 | 
						|
								// include the BLAS headers
							 | 
						|
								extern "C" {
							 | 
						|
								#include <cblas.h>
							 | 
						|
								}
							 | 
						|
								#include <string>
							 | 
						|
								
							 | 
						|
								#ifdef _FLOAT
							 | 
						|
								typedef float Scalar;
							 | 
						|
								#define CBLAS_GEMM cblas_sgemm
							 | 
						|
								#else
							 | 
						|
								typedef double Scalar;
							 | 
						|
								#define CBLAS_GEMM cblas_dgemm
							 | 
						|
								#endif
							 | 
						|
								
							 | 
						|
								
							 | 
						|
								typedef StormEigen::Matrix<Scalar,StormEigen::Dynamic,Eigen::Dynamic> MyMatrix;
							 | 
						|
								void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
							 | 
						|
								void check_product(int M, int N, int K);
							 | 
						|
								void check_product(void);
							 | 
						|
								
							 | 
						|
								int main(int argc, char *argv[])
							 | 
						|
								{
							 | 
						|
								  // disable SSE exceptions
							 | 
						|
								  #ifdef __GNUC__
							 | 
						|
								  {
							 | 
						|
								    int aux;
							 | 
						|
								    asm(
							 | 
						|
								    "stmxcsr   %[aux]           \n\t"
							 | 
						|
								    "orl       $32832, %[aux]   \n\t"
							 | 
						|
								    "ldmxcsr   %[aux]           \n\t"
							 | 
						|
								    : : [aux] "m" (aux));
							 | 
						|
								  }
							 | 
						|
								  #endif
							 | 
						|
								
							 | 
						|
								  int nbtries=1, nbloops=1, M, N, K;
							 | 
						|
								
							 | 
						|
								  if (argc==2)
							 | 
						|
								  {
							 | 
						|
								    if (std::string(argv[1])=="check")
							 | 
						|
								      check_product();
							 | 
						|
								    else
							 | 
						|
								      M = N = K = atoi(argv[1]);
							 | 
						|
								  }
							 | 
						|
								  else if ((argc==3) && (std::string(argv[1])=="auto"))
							 | 
						|
								  {
							 | 
						|
								    M = N = K = atoi(argv[2]);
							 | 
						|
								    nbloops = 1000000000/(M*M*M);
							 | 
						|
								    if (nbloops<1)
							 | 
						|
								      nbloops = 1;
							 | 
						|
								    nbtries = 6;
							 | 
						|
								  }
							 | 
						|
								  else if (argc==4)
							 | 
						|
								  {
							 | 
						|
								    M = N = K = atoi(argv[1]);
							 | 
						|
								    nbloops = atoi(argv[2]);
							 | 
						|
								    nbtries = atoi(argv[3]);
							 | 
						|
								  }
							 | 
						|
								  else if (argc==6)
							 | 
						|
								  {
							 | 
						|
								    M = atoi(argv[1]);
							 | 
						|
								    N = atoi(argv[2]);
							 | 
						|
								    K = atoi(argv[3]);
							 | 
						|
								    nbloops = atoi(argv[4]);
							 | 
						|
								    nbtries = atoi(argv[5]);
							 | 
						|
								  }
							 | 
						|
								  else
							 | 
						|
								  {
							 | 
						|
								    std::cout << "Usage: " << argv[0] << " size  \n";
							 | 
						|
								    std::cout << "Usage: " << argv[0] << " auto size\n";
							 | 
						|
								    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
							 | 
						|
								    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
							 | 
						|
								    std::cout << "Usage: " << argv[0] << " check\n";
							 | 
						|
								    std::cout << "Options:\n";
							 | 
						|
								    std::cout << "    size       unique size of the 2 matrices (integer)\n";
							 | 
						|
								    std::cout << "    auto       automatically set the number of repetitions and tries\n";
							 | 
						|
								    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
							 | 
						|
								    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
							 | 
						|
								    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
							 | 
						|
								    std::cout << "    check      check eigen product using cblas as a reference\n";
							 | 
						|
								    exit(1);
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
							 | 
						|
								
							 | 
						|
								  if (!(std::string(argv[1])=="auto"))
							 | 
						|
								    std::cout << M << " x " << N << " x " << K << "\n";
							 | 
						|
								
							 | 
						|
								  Scalar alpha, beta;
							 | 
						|
								  MyMatrix ma(M,K), mb(K,N), mc(M,N);
							 | 
						|
								  ma = MyMatrix::Random(M,K);
							 | 
						|
								  mb = MyMatrix::Random(K,N);
							 | 
						|
								  mc = MyMatrix::Random(M,N);
							 | 
						|
								
							 | 
						|
								  StormEigen::BenchTimer timer;
							 | 
						|
								
							 | 
						|
								  // we simply compute c += a*b, so:
							 | 
						|
								  alpha = 1;
							 | 
						|
								  beta = 1;
							 | 
						|
								
							 | 
						|
								  // bench cblas
							 | 
						|
								  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
							 | 
						|
								  if (!(std::string(argv[1])=="auto"))
							 | 
						|
								  {
							 | 
						|
								    timer.reset();
							 | 
						|
								    for (uint k=0 ; k<nbtries ; ++k)
							 | 
						|
								    {
							 | 
						|
								        timer.start();
							 | 
						|
								        for (uint j=0 ; j<nbloops ; ++j)
							 | 
						|
								              #ifdef STORMEIGEN_DEFAULT_TO_ROW_MAJOR
							 | 
						|
								              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
							 | 
						|
								              #else
							 | 
						|
								              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
							 | 
						|
								              #endif
							 | 
						|
								        timer.stop();
							 | 
						|
								    }
							 | 
						|
								    if (!(std::string(argv[1])=="auto"))
							 | 
						|
								      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
							 | 
						|
								    else
							 | 
						|
								        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  // clear
							 | 
						|
								  ma = MyMatrix::Random(M,K);
							 | 
						|
								  mb = MyMatrix::Random(K,N);
							 | 
						|
								  mc = MyMatrix::Random(M,N);
							 | 
						|
								
							 | 
						|
								  // eigen
							 | 
						|
								//   if (!(std::string(argv[1])=="auto"))
							 | 
						|
								  {
							 | 
						|
								      timer.reset();
							 | 
						|
								      for (uint k=0 ; k<nbtries ; ++k)
							 | 
						|
								      {
							 | 
						|
								          timer.start();
							 | 
						|
								          bench_eigengemm(mc, ma, mb, nbloops);
							 | 
						|
								          timer.stop();
							 | 
						|
								      }
							 | 
						|
								      if (!(std::string(argv[1])=="auto"))
							 | 
						|
								        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
							 | 
						|
								      else
							 | 
						|
								        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  std::cout << "l1: " << StormEigen::l1CacheSize() << std::endl;
							 | 
						|
								  std::cout << "l2: " << StormEigen::l2CacheSize() << std::endl;
							 | 
						|
								  
							 | 
						|
								
							 | 
						|
								  return 0;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								using namespace StormEigen;
							 | 
						|
								
							 | 
						|
								void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
							 | 
						|
								{
							 | 
						|
								  for (uint j=0 ; j<nbloops ; ++j)
							 | 
						|
								      mc.noalias() += ma * mb;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								#define MYVERIFY(A,M) if (!(A)) { \
							 | 
						|
								    std::cout << "FAIL: " << M << "\n"; \
							 | 
						|
								  }
							 | 
						|
								void check_product(int M, int N, int K)
							 | 
						|
								{
							 | 
						|
								  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
							 | 
						|
								  ma = MyMatrix::Random(M,K);
							 | 
						|
								  mb = MyMatrix::Random(K,N);
							 | 
						|
								  maT = ma.transpose();
							 | 
						|
								  mbT = mb.transpose();
							 | 
						|
								  mc = MyMatrix::Random(M,N);
							 | 
						|
								
							 | 
						|
								  MyMatrix::Scalar eps = 1e-4;
							 | 
						|
								
							 | 
						|
								  meigen = mref = mc;
							 | 
						|
								  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
							 | 
						|
								  meigen += ma * mb;
							 | 
						|
								  MYVERIFY(meigen.isApprox(mref, eps),". * .");
							 | 
						|
								
							 | 
						|
								  meigen = mref = mc;
							 | 
						|
								  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
							 | 
						|
								  meigen += maT.transpose() * mb;
							 | 
						|
								  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
							 | 
						|
								
							 | 
						|
								  meigen = mref = mc;
							 | 
						|
								  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
							 | 
						|
								  meigen += (maT.transpose()) * (mbT.transpose());
							 | 
						|
								  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
							 | 
						|
								
							 | 
						|
								  meigen = mref = mc;
							 | 
						|
								  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
							 | 
						|
								  meigen += ma * mbT.transpose();
							 | 
						|
								  MYVERIFY(meigen.isApprox(mref, eps),". * T");
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								void check_product(void)
							 | 
						|
								{
							 | 
						|
								  int M, N, K;
							 | 
						|
								  for (uint i=0; i<1000; ++i)
							 | 
						|
								  {
							 | 
						|
								    M = internal::random<int>(1,64);
							 | 
						|
								    N = internal::random<int>(1,768);
							 | 
						|
								    K = internal::random<int>(1,768);
							 | 
						|
								    M = (0 + M) * 1;
							 | 
						|
								    std::cout << M << " x " << N << " x " << K << "\n";
							 | 
						|
								    check_product(M, N, K);
							 | 
						|
								  }
							 | 
						|
								}
							 | 
						|
								
							 |