219 lines
6.2 KiB

  1. // g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
  2. // possible options:
  3. // -DSTORMEIGEN_DONT_VECTORIZE
  4. // -msse2
  5. // #define STORMEIGEN_DEFAULT_TO_ROW_MAJOR
  6. #define _FLOAT
  7. #include <iostream>
  8. #include <StormEigen/Core>
  9. #include "BenchTimer.h"
  10. // include the BLAS headers
  11. extern "C" {
  12. #include <cblas.h>
  13. }
  14. #include <string>
  15. #ifdef _FLOAT
  16. typedef float Scalar;
  17. #define CBLAS_GEMM cblas_sgemm
  18. #else
  19. typedef double Scalar;
  20. #define CBLAS_GEMM cblas_dgemm
  21. #endif
  22. typedef StormEigen::Matrix<Scalar,StormEigen::Dynamic,Eigen::Dynamic> MyMatrix;
  23. void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
  24. void check_product(int M, int N, int K);
  25. void check_product(void);
  26. int main(int argc, char *argv[])
  27. {
  28. // disable SSE exceptions
  29. #ifdef __GNUC__
  30. {
  31. int aux;
  32. asm(
  33. "stmxcsr %[aux] \n\t"
  34. "orl $32832, %[aux] \n\t"
  35. "ldmxcsr %[aux] \n\t"
  36. : : [aux] "m" (aux));
  37. }
  38. #endif
  39. int nbtries=1, nbloops=1, M, N, K;
  40. if (argc==2)
  41. {
  42. if (std::string(argv[1])=="check")
  43. check_product();
  44. else
  45. M = N = K = atoi(argv[1]);
  46. }
  47. else if ((argc==3) && (std::string(argv[1])=="auto"))
  48. {
  49. M = N = K = atoi(argv[2]);
  50. nbloops = 1000000000/(M*M*M);
  51. if (nbloops<1)
  52. nbloops = 1;
  53. nbtries = 6;
  54. }
  55. else if (argc==4)
  56. {
  57. M = N = K = atoi(argv[1]);
  58. nbloops = atoi(argv[2]);
  59. nbtries = atoi(argv[3]);
  60. }
  61. else if (argc==6)
  62. {
  63. M = atoi(argv[1]);
  64. N = atoi(argv[2]);
  65. K = atoi(argv[3]);
  66. nbloops = atoi(argv[4]);
  67. nbtries = atoi(argv[5]);
  68. }
  69. else
  70. {
  71. std::cout << "Usage: " << argv[0] << " size \n";
  72. std::cout << "Usage: " << argv[0] << " auto size\n";
  73. std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
  74. std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
  75. std::cout << "Usage: " << argv[0] << " check\n";
  76. std::cout << "Options:\n";
  77. std::cout << " size unique size of the 2 matrices (integer)\n";
  78. std::cout << " auto automatically set the number of repetitions and tries\n";
  79. std::cout << " nbloops number of times the GEMM routines is executed\n";
  80. std::cout << " nbtries number of times the loop is benched (return the best try)\n";
  81. std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
  82. std::cout << " check check eigen product using cblas as a reference\n";
  83. exit(1);
  84. }
  85. double nbmad = double(M) * double(N) * double(K) * double(nbloops);
  86. if (!(std::string(argv[1])=="auto"))
  87. std::cout << M << " x " << N << " x " << K << "\n";
  88. Scalar alpha, beta;
  89. MyMatrix ma(M,K), mb(K,N), mc(M,N);
  90. ma = MyMatrix::Random(M,K);
  91. mb = MyMatrix::Random(K,N);
  92. mc = MyMatrix::Random(M,N);
  93. StormEigen::BenchTimer timer;
  94. // we simply compute c += a*b, so:
  95. alpha = 1;
  96. beta = 1;
  97. // bench cblas
  98. // ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
  99. if (!(std::string(argv[1])=="auto"))
  100. {
  101. timer.reset();
  102. for (uint k=0 ; k<nbtries ; ++k)
  103. {
  104. timer.start();
  105. for (uint j=0 ; j<nbloops ; ++j)
  106. #ifdef STORMEIGEN_DEFAULT_TO_ROW_MAJOR
  107. CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
  108. #else
  109. CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
  110. #endif
  111. timer.stop();
  112. }
  113. if (!(std::string(argv[1])=="auto"))
  114. std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
  115. else
  116. std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
  117. }
  118. // clear
  119. ma = MyMatrix::Random(M,K);
  120. mb = MyMatrix::Random(K,N);
  121. mc = MyMatrix::Random(M,N);
  122. // eigen
  123. // if (!(std::string(argv[1])=="auto"))
  124. {
  125. timer.reset();
  126. for (uint k=0 ; k<nbtries ; ++k)
  127. {
  128. timer.start();
  129. bench_eigengemm(mc, ma, mb, nbloops);
  130. timer.stop();
  131. }
  132. if (!(std::string(argv[1])=="auto"))
  133. std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
  134. else
  135. std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
  136. }
  137. std::cout << "l1: " << StormEigen::l1CacheSize() << std::endl;
  138. std::cout << "l2: " << StormEigen::l2CacheSize() << std::endl;
  139. return 0;
  140. }
  141. using namespace StormEigen;
  142. void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
  143. {
  144. for (uint j=0 ; j<nbloops ; ++j)
  145. mc.noalias() += ma * mb;
  146. }
  147. #define MYVERIFY(A,M) if (!(A)) { \
  148. std::cout << "FAIL: " << M << "\n"; \
  149. }
  150. void check_product(int M, int N, int K)
  151. {
  152. MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
  153. ma = MyMatrix::Random(M,K);
  154. mb = MyMatrix::Random(K,N);
  155. maT = ma.transpose();
  156. mbT = mb.transpose();
  157. mc = MyMatrix::Random(M,N);
  158. MyMatrix::Scalar eps = 1e-4;
  159. meigen = mref = mc;
  160. CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
  161. meigen += ma * mb;
  162. MYVERIFY(meigen.isApprox(mref, eps),". * .");
  163. meigen = mref = mc;
  164. CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
  165. meigen += maT.transpose() * mb;
  166. MYVERIFY(meigen.isApprox(mref, eps),"T * .");
  167. meigen = mref = mc;
  168. CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
  169. meigen += (maT.transpose()) * (mbT.transpose());
  170. MYVERIFY(meigen.isApprox(mref, eps),"T * T");
  171. meigen = mref = mc;
  172. CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
  173. meigen += ma * mbT.transpose();
  174. MYVERIFY(meigen.isApprox(mref, eps),". * T");
  175. }
  176. void check_product(void)
  177. {
  178. int M, N, K;
  179. for (uint i=0; i<1000; ++i)
  180. {
  181. M = internal::random<int>(1,64);
  182. N = internal::random<int>(1,768);
  183. K = internal::random<int>(1,768);
  184. M = (0 + M) * 1;
  185. std::cout << M << " x " << N << " x " << K << "\n";
  186. check_product(M, N, K);
  187. }
  188. }