You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

400 lines
12 KiB

  1. /*
  2. * Copyright (C) 2015 Christopher Gilbert.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a copy
  5. * of this software and associated documentation files (the "Software"), to deal
  6. * in the Software without restriction, including without limitation the rights
  7. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8. * copies of the Software, and to permit persons to whom the Software is
  9. * furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in all
  12. * copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20. * SOFTWARE.
  21. */
  22. #ifndef BENCHPRESS_HPP
  23. #define BENCHPRESS_HPP
  24. #include <algorithm> // max, min
  25. #include <atomic> // atomic_intmax_t
  26. #include <chrono> // high_resolution_timer, duration
  27. #include <functional> // function
  28. #include <iomanip> // setw
  29. #include <iostream> // cout
  30. #include <regex> // regex, regex_match
  31. #include <sstream> // stringstream
  32. #include <string> // string
  33. #include <thread> // thread
  34. #include <vector> // vector
  35. namespace benchpress {
  36. /*
  37. * The options class encapsulates all options for running benchmarks.
  38. *
  39. * When including benchpress, a main function can be emitted which includes a command-line parser for building an
  40. * options object. However from time-to-time it may be necessary for the developer to have to build their own main
  41. * stub and construct the options object manually.
  42. *
  43. * options opts;
  44. * opts
  45. * .bench(".*")
  46. * .benchtime(1)
  47. * .cpu(4);
  48. */
  49. class options {
  50. std::string d_bench;
  51. size_t d_benchtime;
  52. size_t d_cpu;
  53. public:
  54. options()
  55. : d_bench(".*")
  56. , d_benchtime(1)
  57. , d_cpu(std::thread::hardware_concurrency())
  58. {}
  59. options& bench(const std::string& bench) {
  60. d_bench = bench;
  61. return *this;
  62. }
  63. options& benchtime(size_t benchtime) {
  64. d_benchtime = benchtime;
  65. return *this;
  66. }
  67. options& cpu(size_t cpu) {
  68. d_cpu = cpu;
  69. return *this;
  70. }
  71. std::string get_bench() const {
  72. return d_bench;
  73. }
  74. size_t get_benchtime() const {
  75. return d_benchtime;
  76. }
  77. size_t get_cpu() const {
  78. return d_cpu;
  79. }
  80. };
  81. class context;
  82. /*
  83. * The benchmark_info class is used to store a function name / pointer pair.
  84. *
  85. * benchmark_info bi("example", [](benchpress::context* b) {
  86. * // benchmark function
  87. * });
  88. */
  89. class benchmark_info {
  90. std::string d_name;
  91. std::function<void(context*)> d_func;
  92. public:
  93. benchmark_info(std::string name, std::function<void(context*)> func)
  94. : d_name(name)
  95. , d_func(func)
  96. {}
  97. std::string get_name() const { return d_name; }
  98. std::function<void(context*)> get_func() const { return d_func; }
  99. };
  100. /*
  101. * The registration class is responsible for providing a single global point of reference for registering
  102. * benchmark functions.
  103. *
  104. * registration::get_ptr()->register_benchmark(info);
  105. */
  106. class registration {
  107. static registration* d_this;
  108. std::vector<benchmark_info> d_benchmarks;
  109. public:
  110. static registration* get_ptr() {
  111. if (nullptr == d_this) {
  112. d_this = new registration();
  113. }
  114. return d_this;
  115. }
  116. void register_benchmark(benchmark_info& info) {
  117. d_benchmarks.push_back(info);
  118. }
  119. std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
  120. };
  121. /*
  122. * The auto_register class is a helper used to register benchmarks.
  123. */
  124. class auto_register {
  125. public:
  126. auto_register(const std::string& name, std::function<void(context*)> func) {
  127. benchmark_info info(name, func);
  128. registration::get_ptr()->register_benchmark(info);
  129. }
  130. };
  131. #define CONCAT(x, y) x ## y
  132. #define CONCAT2(x, y) CONCAT(x, y)
  133. // The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
  134. // registration class.
  135. #define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
  136. // This macro will prevent the compiler from removing a redundant code path which has no side-effects.
  137. #define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
  138. /*
  139. * The result class is responsible for producing a printable string representation of a benchmark run.
  140. */
  141. class result {
  142. size_t d_num_iterations;
  143. std::chrono::nanoseconds d_duration;
  144. size_t d_num_bytes;
  145. public:
  146. result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
  147. : d_num_iterations(num_iterations)
  148. , d_duration(duration)
  149. , d_num_bytes(num_bytes)
  150. {}
  151. size_t get_ns_per_op() const {
  152. if (d_num_iterations <= 0) {
  153. return 0;
  154. }
  155. return d_duration.count() / d_num_iterations;
  156. }
  157. double get_mb_per_s() const {
  158. if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
  159. return 0;
  160. }
  161. return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
  162. double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
  163. }
  164. std::string to_string() const {
  165. std::stringstream tmp;
  166. tmp << std::setw(12) << std::right << d_num_iterations;
  167. size_t npo = get_ns_per_op();
  168. tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
  169. double mbs = get_mb_per_s();
  170. if (mbs > 0.0) {
  171. tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
  172. }
  173. return std::string(tmp.str());
  174. }
  175. };
  176. /*
  177. * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
  178. */
  179. class parallel_context {
  180. std::atomic_intmax_t d_num_iterations;
  181. public:
  182. parallel_context(size_t num_iterations)
  183. : d_num_iterations(num_iterations)
  184. {}
  185. bool next() {
  186. return (d_num_iterations.fetch_sub(1) > 0);
  187. }
  188. };
  189. /*
  190. * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
  191. */
  192. class context {
  193. bool d_timer_on;
  194. std::chrono::high_resolution_clock::time_point d_start;
  195. std::chrono::nanoseconds d_duration;
  196. std::chrono::seconds d_benchtime;
  197. size_t d_num_iterations;
  198. size_t d_num_threads;
  199. size_t d_num_bytes;
  200. benchmark_info d_benchmark;
  201. public:
  202. context(const benchmark_info& info, const options& opts)
  203. : d_timer_on(false)
  204. , d_start()
  205. , d_duration()
  206. , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
  207. , d_num_iterations(1)
  208. , d_num_threads(opts.get_cpu())
  209. , d_num_bytes(0)
  210. , d_benchmark(info)
  211. {}
  212. size_t num_iterations() const { return d_num_iterations; }
  213. void set_num_threads(size_t n) { d_num_threads = n; }
  214. size_t num_threads() const { return d_num_threads; }
  215. void start_timer() {
  216. if (!d_timer_on) {
  217. d_start = std::chrono::high_resolution_clock::now();
  218. d_timer_on = true;
  219. }
  220. }
  221. void stop_timer() {
  222. if (d_timer_on) {
  223. d_duration += std::chrono::high_resolution_clock::now() - d_start;
  224. d_timer_on = false;
  225. }
  226. }
  227. void reset_timer() {
  228. if (d_timer_on) {
  229. d_start = std::chrono::high_resolution_clock::now();
  230. }
  231. d_duration = std::chrono::nanoseconds::zero();
  232. }
  233. void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
  234. size_t get_ns_per_op() {
  235. if (d_num_iterations <= 0) {
  236. return 0;
  237. }
  238. return d_duration.count() / d_num_iterations;
  239. }
  240. void run_n(size_t n) {
  241. d_num_iterations = n;
  242. reset_timer();
  243. start_timer();
  244. d_benchmark.get_func()(this);
  245. stop_timer();
  246. }
  247. void run_parallel(std::function<void(parallel_context*)> f) {
  248. parallel_context pc(d_num_iterations);
  249. std::vector<std::thread> threads;
  250. for (size_t i = 0; i < d_num_threads; ++i) {
  251. threads.push_back(std::thread([&pc,&f]() -> void {
  252. f(&pc);
  253. }));
  254. }
  255. for(auto& thread : threads){
  256. thread.join();
  257. }
  258. }
  259. result run() {
  260. size_t n = 1;
  261. run_n(n);
  262. while (d_duration < d_benchtime && n < 1e9) {
  263. size_t last = n;
  264. if (get_ns_per_op() == 0) {
  265. n = 1e9;
  266. } else {
  267. n = d_duration.count() / get_ns_per_op();
  268. }
  269. n = std::max(std::min(n+n/2, 100*last), last+1);
  270. n = round_up(n);
  271. run_n(n);
  272. }
  273. return result(n, d_duration, d_num_bytes);
  274. }
  275. private:
  276. template<typename T>
  277. T round_down_10(T n) {
  278. int tens = 0;
  279. while (n > 10) {
  280. n /= 10;
  281. tens++;
  282. }
  283. int result = 1;
  284. for (int i = 0; i < tens; ++i) {
  285. result *= 10;
  286. }
  287. return result;
  288. }
  289. template<typename T>
  290. T round_up(T n) {
  291. T base = round_down_10(n);
  292. if (n < (2 * base)) {
  293. return 2 * base;
  294. }
  295. if (n < (5 * base)) {
  296. return 5 * base;
  297. }
  298. return 10 * base;
  299. }
  300. };
  301. /*
  302. * The run_benchmarks function will run the registered benchmarks.
  303. */
  304. void run_benchmarks(const options& opts) {
  305. std::regex match_r(opts.get_bench());
  306. auto benchmarks = registration::get_ptr()->get_benchmarks();
  307. for (auto& info : benchmarks) {
  308. if (std::regex_match(info.get_name(), match_r)) {
  309. context c(info, opts);
  310. auto r = c.run();
  311. std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
  312. }
  313. }
  314. }
  315. } // namespace benchpress
  316. /*
  317. * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
  318. * command-line parser and then executes run_benchmarks.
  319. */
  320. #ifdef BENCHPRESS_CONFIG_MAIN
  321. #include "cxxopts.hpp"
  322. benchpress::registration* benchpress::registration::d_this;
  323. int main(int argc, char** argv) {
  324. std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
  325. benchpress::options bench_opts;
  326. try {
  327. cxxopts::Options cmd_opts(argv[0], " - command line options");
  328. cmd_opts.add_options()
  329. ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
  330. ->default_value(".*"))
  331. ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
  332. ->default_value("1"))
  333. ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
  334. ->default_value(std::to_string(std::thread::hardware_concurrency())))
  335. ("help", "print help")
  336. ;
  337. cmd_opts.parse(argc, argv);
  338. if (cmd_opts.count("help")) {
  339. std::cout << cmd_opts.help({""}) << std::endl;
  340. exit(0);
  341. }
  342. if (cmd_opts.count("bench")) {
  343. bench_opts.bench(cmd_opts["bench"].as<std::string>());
  344. }
  345. if (cmd_opts.count("benchtime")) {
  346. bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
  347. }
  348. if (cmd_opts.count("cpu")) {
  349. bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
  350. }
  351. } catch (const cxxopts::OptionException& e) {
  352. std::cout << "error parsing options: " << e.what() << std::endl;
  353. exit(1);
  354. }
  355. benchpress::run_benchmarks(bench_opts);
  356. float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
  357. std::chrono::high_resolution_clock::now() - bp_start
  358. ).count() / 1000.f;
  359. std::cout << argv[0] << " " << duration << "s" << std::endl;
  360. return 0;
  361. }
  362. #endif
  363. #endif // BENCHPRESS_HPP