You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							401 lines
						
					
					
						
							12 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							401 lines
						
					
					
						
							12 KiB
						
					
					
				
								/*
							 | 
						|
								* Copyright (C) 2015 Christopher Gilbert.
							 | 
						|
								*
							 | 
						|
								* Permission is hereby granted, free of charge, to any person obtaining a copy
							 | 
						|
								* of this software and associated documentation files (the "Software"), to deal
							 | 
						|
								* in the Software without restriction, including without limitation the rights
							 | 
						|
								* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
							 | 
						|
								* copies of the Software, and to permit persons to whom the Software is
							 | 
						|
								* furnished to do so, subject to the following conditions:
							 | 
						|
								*
							 | 
						|
								* The above copyright notice and this permission notice shall be included in all
							 | 
						|
								* copies or substantial portions of the Software.
							 | 
						|
								*
							 | 
						|
								* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
							 | 
						|
								* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
							 | 
						|
								* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
							 | 
						|
								* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
							 | 
						|
								* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
							 | 
						|
								* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
							 | 
						|
								* SOFTWARE.
							 | 
						|
								*/
							 | 
						|
								#ifndef BENCHPRESS_HPP
							 | 
						|
								#define BENCHPRESS_HPP
							 | 
						|
								
							 | 
						|
								#include <algorithm>   // max, min
							 | 
						|
								#include <atomic>      // atomic_intmax_t
							 | 
						|
								#include <chrono>      // high_resolution_timer, duration
							 | 
						|
								#include <functional>  // function
							 | 
						|
								#include <iomanip>     // setw
							 | 
						|
								#include <iostream>    // cout
							 | 
						|
								#include <regex>       // regex, regex_match
							 | 
						|
								#include <sstream>     // stringstream
							 | 
						|
								#include <string>      // string
							 | 
						|
								#include <thread>      // thread
							 | 
						|
								#include <vector>      // vector
							 | 
						|
								
							 | 
						|
								namespace benchpress {
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The options class encapsulates all options for running benchmarks.
							 | 
						|
								 *
							 | 
						|
								 * When including benchpress, a main function can be emitted which includes a command-line parser for building an
							 | 
						|
								 * options object. However from time-to-time it may be necessary for the developer to have to build their own main
							 | 
						|
								 * stub and construct the options object manually.
							 | 
						|
								 *
							 | 
						|
								 * options opts;
							 | 
						|
								 * opts
							 | 
						|
								 *     .bench(".*")
							 | 
						|
								 *     .benchtime(1)
							 | 
						|
								 *     .cpu(4);
							 | 
						|
								 */
							 | 
						|
								class options {
							 | 
						|
								    std::string d_bench;
							 | 
						|
								    size_t      d_benchtime;
							 | 
						|
								    size_t      d_cpu;
							 | 
						|
								public:
							 | 
						|
								    options()
							 | 
						|
								        : d_bench(".*")
							 | 
						|
								        , d_benchtime(1)
							 | 
						|
								        , d_cpu(std::thread::hardware_concurrency())
							 | 
						|
								    {}
							 | 
						|
								    options& bench(const std::string& bench) {
							 | 
						|
								        d_bench = bench;
							 | 
						|
								        return *this;
							 | 
						|
								    }
							 | 
						|
								    options& benchtime(size_t benchtime) {
							 | 
						|
								        d_benchtime = benchtime;
							 | 
						|
								        return *this;
							 | 
						|
								    }
							 | 
						|
								    options& cpu(size_t cpu) {
							 | 
						|
								        d_cpu = cpu;
							 | 
						|
								        return *this;
							 | 
						|
								    }
							 | 
						|
								    std::string get_bench() const {
							 | 
						|
								        return d_bench;
							 | 
						|
								    }
							 | 
						|
								    size_t get_benchtime() const {
							 | 
						|
								        return d_benchtime;
							 | 
						|
								    }
							 | 
						|
								    size_t get_cpu() const {
							 | 
						|
								        return d_cpu;
							 | 
						|
								    }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								class context;
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The benchmark_info class is used to store a function name / pointer pair.
							 | 
						|
								 *
							 | 
						|
								 * benchmark_info bi("example", [](benchpress::context* b) {
							 | 
						|
								 *     // benchmark function
							 | 
						|
								 * });
							 | 
						|
								 */
							 | 
						|
								class benchmark_info {
							 | 
						|
								    std::string                   d_name;
							 | 
						|
								    std::function<void(context*)> d_func;
							 | 
						|
								
							 | 
						|
								public:
							 | 
						|
								    benchmark_info(std::string name, std::function<void(context*)> func)
							 | 
						|
								        : d_name(name)
							 | 
						|
								        , d_func(func)
							 | 
						|
								    {}
							 | 
						|
								
							 | 
						|
								    std::string                   get_name() const { return d_name; }
							 | 
						|
								    std::function<void(context*)> get_func() const { return d_func; }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The registration class is responsible for providing a single global point of reference for registering
							 | 
						|
								 * benchmark functions.
							 | 
						|
								 *
							 | 
						|
								 * registration::get_ptr()->register_benchmark(info);
							 | 
						|
								 */
							 | 
						|
								class registration {
							 | 
						|
								    static registration*        d_this;
							 | 
						|
								    std::vector<benchmark_info> d_benchmarks;
							 | 
						|
								
							 | 
						|
								public:
							 | 
						|
								    static registration* get_ptr() {
							 | 
						|
								        if (nullptr == d_this) {
							 | 
						|
								            d_this = new registration();
							 | 
						|
								        }
							 | 
						|
								        return d_this;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    void register_benchmark(benchmark_info& info) {
							 | 
						|
								        d_benchmarks.push_back(info);
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The auto_register class is a helper used to register benchmarks.
							 | 
						|
								 */
							 | 
						|
								class auto_register {
							 | 
						|
								public:
							 | 
						|
								    auto_register(const std::string& name, std::function<void(context*)> func) {
							 | 
						|
								        benchmark_info info(name, func);
							 | 
						|
								        registration::get_ptr()->register_benchmark(info);
							 | 
						|
								    }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								#define CONCAT(x, y) x ## y
							 | 
						|
								#define CONCAT2(x, y) CONCAT(x, y)
							 | 
						|
								
							 | 
						|
								// The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
							 | 
						|
								// registration class.
							 | 
						|
								#define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
							 | 
						|
								
							 | 
						|
								// This macro will prevent the compiler from removing a redundant code path which has no side-effects.
							 | 
						|
								#define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The result class is responsible for producing a printable string representation of a benchmark run.
							 | 
						|
								 */
							 | 
						|
								class result {
							 | 
						|
								    size_t                   d_num_iterations;
							 | 
						|
								    std::chrono::nanoseconds d_duration;
							 | 
						|
								    size_t                   d_num_bytes;
							 | 
						|
								
							 | 
						|
								public:
							 | 
						|
								    result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
							 | 
						|
								        : d_num_iterations(num_iterations)
							 | 
						|
								        , d_duration(duration)
							 | 
						|
								        , d_num_bytes(num_bytes)
							 | 
						|
								    {}
							 | 
						|
								
							 | 
						|
								    size_t get_ns_per_op() const {
							 | 
						|
								        if (d_num_iterations <= 0) {
							 | 
						|
								            return 0;
							 | 
						|
								        }
							 | 
						|
								        return d_duration.count() / d_num_iterations;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    double get_mb_per_s() const {
							 | 
						|
								        if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
							 | 
						|
								            return 0;
							 | 
						|
								        }
							 | 
						|
								        return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
							 | 
						|
								                double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    std::string to_string() const {
							 | 
						|
								        std::stringstream tmp;
							 | 
						|
								        tmp << std::setw(12) << std::right << d_num_iterations;
							 | 
						|
								        size_t npo = get_ns_per_op();
							 | 
						|
								        tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
							 | 
						|
								        double mbs = get_mb_per_s();
							 | 
						|
								        if (mbs > 0.0) {
							 | 
						|
								            tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
							 | 
						|
								        }
							 | 
						|
								        return std::string(tmp.str());
							 | 
						|
								    }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
							 | 
						|
								 */
							 | 
						|
								class parallel_context {
							 | 
						|
								    std::atomic_intmax_t d_num_iterations;
							 | 
						|
								public:
							 | 
						|
								    parallel_context(size_t num_iterations)
							 | 
						|
								        : d_num_iterations(num_iterations)
							 | 
						|
								    {}
							 | 
						|
								
							 | 
						|
								    bool next() {
							 | 
						|
								        return (d_num_iterations.fetch_sub(1) > 0);
							 | 
						|
								    }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
							 | 
						|
								 */
							 | 
						|
								class context {
							 | 
						|
								    bool                                           d_timer_on;
							 | 
						|
								    std::chrono::high_resolution_clock::time_point d_start;
							 | 
						|
								    std::chrono::nanoseconds                       d_duration;
							 | 
						|
								    std::chrono::seconds                           d_benchtime;
							 | 
						|
								    size_t                                         d_num_iterations;
							 | 
						|
								    size_t                                         d_num_threads;
							 | 
						|
								    size_t                                         d_num_bytes;
							 | 
						|
								    benchmark_info                                 d_benchmark;
							 | 
						|
								
							 | 
						|
								public:
							 | 
						|
								    context(const benchmark_info& info, const options& opts)
							 | 
						|
								        : d_timer_on(false)
							 | 
						|
								        , d_start()
							 | 
						|
								        , d_duration()
							 | 
						|
								        , d_benchtime(std::chrono::seconds(opts.get_benchtime()))
							 | 
						|
								        , d_num_iterations(1)
							 | 
						|
								        , d_num_threads(opts.get_cpu())
							 | 
						|
								        , d_num_bytes(0)
							 | 
						|
								        , d_benchmark(info)
							 | 
						|
								    {}
							 | 
						|
								
							 | 
						|
								    size_t num_iterations() const { return d_num_iterations; }
							 | 
						|
								
							 | 
						|
								    void set_num_threads(size_t n) { d_num_threads = n; }
							 | 
						|
								    size_t num_threads() const { return d_num_threads; }
							 | 
						|
								
							 | 
						|
								    void start_timer() {
							 | 
						|
								        if (!d_timer_on) {
							 | 
						|
								            d_start = std::chrono::high_resolution_clock::now();
							 | 
						|
								            d_timer_on = true;
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								    void stop_timer() {
							 | 
						|
								        if (d_timer_on) {
							 | 
						|
								            d_duration += std::chrono::high_resolution_clock::now() - d_start;
							 | 
						|
								            d_timer_on = false;
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								    void reset_timer() {
							 | 
						|
								        if (d_timer_on) {
							 | 
						|
								            d_start = std::chrono::high_resolution_clock::now();
							 | 
						|
								        }
							 | 
						|
								        d_duration = std::chrono::nanoseconds::zero();
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
							 | 
						|
								
							 | 
						|
								    size_t get_ns_per_op() {
							 | 
						|
								        if (d_num_iterations <= 0) {
							 | 
						|
								            return 0;
							 | 
						|
								        }
							 | 
						|
								        return d_duration.count() / d_num_iterations;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    void run_n(size_t n) {
							 | 
						|
								        d_num_iterations = n;
							 | 
						|
								        reset_timer();
							 | 
						|
								        start_timer();
							 | 
						|
								        d_benchmark.get_func()(this);
							 | 
						|
								        stop_timer();
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    void run_parallel(std::function<void(parallel_context*)> f) {
							 | 
						|
								        parallel_context pc(d_num_iterations);
							 | 
						|
								        std::vector<std::thread> threads;
							 | 
						|
								        for (size_t i = 0; i < d_num_threads; ++i) {
							 | 
						|
								            threads.push_back(std::thread([&pc,&f]() -> void {
							 | 
						|
								                f(&pc);
							 | 
						|
								            }));
							 | 
						|
								        }
							 | 
						|
								        for(auto& thread : threads){
							 | 
						|
								            thread.join();
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    result run() {
							 | 
						|
								        size_t n = 1;
							 | 
						|
								        run_n(n);
							 | 
						|
								        while (d_duration < d_benchtime && n < 1e9) {
							 | 
						|
								            size_t last = n;
							 | 
						|
								            if (get_ns_per_op() == 0) {
							 | 
						|
								                n = 1e9;
							 | 
						|
								            } else {
							 | 
						|
								                n = d_duration.count() / get_ns_per_op();
							 | 
						|
								            }
							 | 
						|
								            n = std::max(std::min(n+n/2, 100*last), last+1);
							 | 
						|
								            n = round_up(n);
							 | 
						|
								            run_n(n);
							 | 
						|
								        }
							 | 
						|
								        return result(n, d_duration, d_num_bytes);
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								private:
							 | 
						|
								    template<typename T>
							 | 
						|
								    T round_down_10(T n) {
							 | 
						|
								        int tens = 0;
							 | 
						|
								        while (n > 10) {
							 | 
						|
								            n /= 10;
							 | 
						|
								            tens++;
							 | 
						|
								        }
							 | 
						|
								        int result = 1;
							 | 
						|
								        for (int i = 0; i < tens; ++i) {
							 | 
						|
								            result *= 10;
							 | 
						|
								        }
							 | 
						|
								        return result;
							 | 
						|
								    }
							 | 
						|
								
							 | 
						|
								    template<typename T>
							 | 
						|
								    T round_up(T n) {
							 | 
						|
								        T base = round_down_10(n);
							 | 
						|
								        if (n < (2 * base)) {
							 | 
						|
								            return 2 * base;
							 | 
						|
								        }
							 | 
						|
								        if (n < (5 * base)) {
							 | 
						|
								            return 5 * base;
							 | 
						|
								        }
							 | 
						|
								        return 10 * base;
							 | 
						|
								    }
							 | 
						|
								};
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * The run_benchmarks function will run the registered benchmarks.
							 | 
						|
								 */
							 | 
						|
								void run_benchmarks(const options& opts) {
							 | 
						|
								    std::regex match_r(opts.get_bench());
							 | 
						|
								    auto benchmarks = registration::get_ptr()->get_benchmarks();
							 | 
						|
								    for (auto& info : benchmarks) {
							 | 
						|
								        if (std::regex_match(info.get_name(), match_r)) {
							 | 
						|
								            context c(info, opts);
							 | 
						|
								            auto r = c.run();
							 | 
						|
								            std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
							 | 
						|
								        }
							 | 
						|
								    }
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								} // namespace benchpress
							 | 
						|
								
							 | 
						|
								/*
							 | 
						|
								 * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
							 | 
						|
								 * command-line parser and then executes run_benchmarks.
							 | 
						|
								 */
							 | 
						|
								#ifdef BENCHPRESS_CONFIG_MAIN
							 | 
						|
								#include "cxxopts.hpp"
							 | 
						|
								benchpress::registration* benchpress::registration::d_this;
							 | 
						|
								int main(int argc, char** argv) {
							 | 
						|
								    std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
							 | 
						|
								    benchpress::options bench_opts;
							 | 
						|
								    try {
							 | 
						|
								        cxxopts::Options cmd_opts(argv[0], " - command line options");
							 | 
						|
								        cmd_opts.add_options()
							 | 
						|
								            ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
							 | 
						|
								                ->default_value(".*"))
							 | 
						|
								            ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
							 | 
						|
								                ->default_value("1"))
							 | 
						|
								            ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
							 | 
						|
								                ->default_value(std::to_string(std::thread::hardware_concurrency())))
							 | 
						|
								            ("help", "print help")
							 | 
						|
								        ;
							 | 
						|
								        cmd_opts.parse(argc, argv);
							 | 
						|
								        if (cmd_opts.count("help")) {
							 | 
						|
								            std::cout << cmd_opts.help({""}) << std::endl;
							 | 
						|
								            exit(0);
							 | 
						|
								        }
							 | 
						|
								        if (cmd_opts.count("bench")) {
							 | 
						|
								            bench_opts.bench(cmd_opts["bench"].as<std::string>());
							 | 
						|
								        }
							 | 
						|
								        if (cmd_opts.count("benchtime")) {
							 | 
						|
								            bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
							 | 
						|
								        }
							 | 
						|
								        if (cmd_opts.count("cpu")) {
							 | 
						|
								            bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
							 | 
						|
								        }
							 | 
						|
								    } catch (const cxxopts::OptionException& e) {
							 | 
						|
								        std::cout << "error parsing options: " << e.what() << std::endl;
							 | 
						|
								        exit(1);
							 | 
						|
								    }
							 | 
						|
								    benchpress::run_benchmarks(bench_opts);
							 | 
						|
								    float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
							 | 
						|
								            std::chrono::high_resolution_clock::now() - bp_start
							 | 
						|
								    ).count() / 1000.f;
							 | 
						|
								    std::cout << argv[0] << " " << duration << "s" << std::endl;
							 | 
						|
								    return 0;
							 | 
						|
								}
							 | 
						|
								#endif
							 | 
						|
								
							 | 
						|
								#endif // BENCHPRESS_HPP
							 |