dump1090-fa/starch/templates/benchmark.c.template

### Copyright (c) 2020, FlightAware LLC.
### All rights reserved.
### See the LICENSE file for licensing terms.

/* starch generated code. Do not edit. */

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
#include <stdalign.h>
#include <stdbool.h>
#include <string.h>
#include <inttypes.h>
#include <time.h>
#include <unistd.h>
#include <errno.h>

#include "${os.path.relpath(gen.generated_include_path, current_dir)}"

typedef struct timespec starch_benchmark_time;

uint64_t starch_benchmark_elapsed(starch_benchmark_time *start, starch_benchmark_time *end);
void *starch_benchmark_aligned_alloc(size_t alignment, size_t type_alignment, size_t size);
void starch_benchmark_aligned_free(void *user_ptr);
void starch_benchmark_get_time(starch_benchmark_time *t);

const unsigned starch_benchmark_warmup_loops = 10;

typedef struct {
    const char *name;
    const char *impl;
    uint64_t ns;
} starch_benchmark_result;

static starch_benchmark_result *starch_benchmark_results = NULL;
static unsigned starch_benchmark_result_size = 0;
static unsigned starch_benchmark_result_count = 0;

typedef struct benchmark_flavor_list_node {
    const char *flavor;
    struct benchmark_flavor_list_node *next;
} starch_benchmark_flavor_list;

static starch_benchmark_flavor_list *starch_benchmark_flavor_whitelist = NULL;
static starch_benchmark_flavor_list *starch_benchmark_flavor_blacklist = NULL;

static bool starch_benchmark_list_only = false;
static bool starch_benchmark_validate_only = false;
static bool starch_benchmark_validation_failed = false;
static bool starch_benchmark_top_only = false;
static unsigned starch_benchmark_iterations = 1;

typedef struct timespec starch_benchmark_time;
void starch_benchmark_get_time(starch_benchmark_time *t)
{
#ifdef CLOCK_THREAD_CPUTIME_ID
    clock_gettime(CLOCK_THREAD_CPUTIME_ID, t);
#else
    clock_gettime(CLOCK_MONOTONIC, t);
#endif
}

uint64_t starch_benchmark_elapsed(starch_benchmark_time *start, starch_benchmark_time *end)
{
    return ((uint64_t)end->tv_sec - (uint64_t)start->tv_sec) * 1000000000U + (uint64_t)end->tv_nsec - (uint64_t)start->tv_nsec;
}

void *starch_benchmark_aligned_alloc(size_t alignment, size_t type_alignment, size_t size)
{
    size_t use_alignment = (type_alignment > alignment ? type_alignment : alignment);
    if ( (use_alignment % type_alignment) || (use_alignment % alignment) ) {
        fprintf(stderr, "STARCH_BENCHMARK_ALLOC of %zu bytes failed: conflicting alignment requirements (%zu versus %zu)\n", size, alignment, type_alignment);
        return NULL;
    }

    /* Over-allocate so we can stash our own pointer before the start, and so that we can adjust
     * the returned alignment so it is only aligned to the requested boundary, and not also
     * aligned to a larger power of two (we don't want to accidentally benchmark the performance
     * of a more restrictive larger alignment)
     */
    size_t header_size = (use_alignment < sizeof(void*) ? sizeof(void*) : use_alignment);
    char *block_ptr = aligned_alloc(use_alignment, header_size + size + use_alignment);
    if (!block_ptr) {
        fprintf(stderr, "STARCH_BENCHMARK_ALLOC of %zu bytes failed: %s\n", size, strerror(errno));
        return NULL;
    }

    char *user_ptr = block_ptr + header_size;
    if ( (uintptr_t)user_ptr % (use_alignment * 2) == 0 ) {
        // user_ptr is aligned to the next power of two, but we don't want that, move it on
        user_ptr += use_alignment;
    }

    void **stash = (void**)user_ptr - 1;
    *stash = block_ptr;

    return user_ptr;
}

void starch_benchmark_aligned_free(void *user_ptr)
{
    if (!user_ptr)
        return;
    void **stash = (void**)user_ptr - 1;
    free(*stash);
}

static bool starch_benchmark_flavor_in_list(const char *flavor, const starch_benchmark_flavor_list *list)
{
    for (; list; list = list->next) {
        if (!strcmp(flavor, list->flavor))
            return true;
    }
    return false;
}

<% functions_to_benchmark = [f for f in gen.functions.values() if f.benchmark] %>
% for function in sorted(functions_to_benchmark):
/* prototypes for benchmark helpers provided by user code */
void ${function.benchmark_symbol} (void);
% if function.benchmark_verify:
bool ${function.benchmark_verify_symbol} ( ${function.declaration_arglist } );
% endif

/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void ${gen.symbol_prefix}${function.name}_benchmark(void);

static void starch_benchmark_one_${function.name}( ${function.regentry_type} * _entry, ${function.declaration_arglist } )
{
    fprintf(stderr, "  %-40s  ", _entry->name);

    /* test for support */
    if (_entry->flavor_supported && !(_entry->flavor_supported())) {
        fprintf(stderr, "unsupported\n");
        return;
    }

    if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
        fprintf(stderr, "skipped (not whitelisted)\n");
        return;
    }

    if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
        fprintf(stderr, "skipped (blacklisted)\n");
        return;
    }

    if (starch_benchmark_list_only) {
        fprintf(stderr, "supported\n");
        return;
    }

    /* initial warmup */
    for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
        _entry->callable ( ${function.named_arglist} );

    % if function.benchmark_verify:
    /* verify correctness of the output */
    if (! ${function.benchmark_verify_symbol} ( ${function.named_arglist} )) {
        fprintf(stderr, "skipped (verification failed)\n");
        starch_benchmark_validation_failed = true;
        return;
    }
    if (starch_benchmark_validate_only) {
        fprintf(stderr, "validation ok\n");
        return;
    }
    % else:
    if (starch_benchmark_validate_only) {
        fprintf(stderr, "no validator defined\n");
        return;
    }
    % endif

    /* pre-benchmark, find a loop count that takes at least 100ms */
    starch_benchmark_time _start, _end;
    uint64_t _elapsed = 0;
    uint64_t _loops = 127;
    while (_elapsed < 100000000) {
        _loops *= 2;
        starch_benchmark_get_time(&_start);
        for (uint64_t _loop = 0; _loop < _loops; ++_loop)
            _entry->callable ( ${function.named_arglist} );
        starch_benchmark_get_time(&_end);
        _elapsed = starch_benchmark_elapsed(&_start, &_end);
    }

    /* real benchmark, run for approx 1 second */
    _loops = _loops * 1000000000 / _elapsed;

    _elapsed = 0;
    uint64_t _elapsed_min = UINT64_MAX;
    uint64_t _elapsed_max = 0;
    for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
        starch_benchmark_get_time(&_start);
        for (uint64_t _loop = 0; _loop < _loops; ++_loop)
            _entry->callable ( ${function.named_arglist} );
        starch_benchmark_get_time(&_end);
        uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
        if (_elapsed_one < _elapsed_min)
            _elapsed_min = _elapsed_one;
        if (_elapsed_one > _elapsed_max)
            _elapsed_max = _elapsed_one;
        _elapsed += _elapsed_one;
    }

    uint64_t _per_loop;
    if (starch_benchmark_iterations > 2)
        _per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
    else
        _per_loop = _elapsed / _loops / starch_benchmark_iterations;

    fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);

    if (starch_benchmark_result_count >= starch_benchmark_result_size) {
        if (!starch_benchmark_result_size)
            starch_benchmark_result_size = 64;
        else
            starch_benchmark_result_size *= 2;
        starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
        if (!starch_benchmark_results) {
            fprintf(stderr, "realloc: %s\n", strerror(errno));
            exit(1);
        }
    }

    starch_benchmark_results[starch_benchmark_result_count].name = "${function.name}";
    starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
    starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
    ++starch_benchmark_result_count;
}

static void starch_benchmark_run_${function.name}( ${function.declaration_arglist } )
{
    for (${function.regentry_type} *_entry = ${function.registry_symbol}; _entry->name; ++_entry) {
        starch_benchmark_one_${function.name}( _entry, ${function.named_arglist} );
    }
}

% endfor

#undef STARCH_ALIGNMENT

#define STARCH_ALIGNMENT 1
#define STARCH_ALIGNED(_ptr) (_ptr)
#define STARCH_SYMBOL(_name) ${gen.symbol_prefix} ## _name ## _benchmark_sym
#define STARCH_IMPL(_function,_impl) ${gen.symbol_prefix} ## _function ## _ ## _impl ## _dummy_benchmark
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#define STARCH_BENCHMARK(_function) ${gen.symbol_prefix} ## _function ## _benchmark
#define STARCH_BENCHMARK_VERIFY(_function) ${gen.symbol_prefix} ## _function ## _benchmark_verify
#define STARCH_BENCHMARK_RUN(_function, ...) starch_benchmark_run_ ## _function ( __VA_ARGS__ )
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(1, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)

% for source in sorted(gen.benchmark_files):
#include "${os.path.relpath(source.path, current_dir)}"
% endfor

#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
#undef STARCH_SYMBOL
#undef STARCH_IMPL
#undef STARCH_IMPL_REQUIRES
#undef STARCH_BENCHMARK
#undef STARCH_BENCHMARK_VERIFY
#undef STARCH_BENCHMARK_RUN
#undef STARCH_BENCHMARK_ALLOC
#undef STARCH_BENCHMARK_FREE

#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
#define STARCH_SYMBOL(_name) ${gen.symbol_prefix} ## _name ## _aligned_benchmark_sym
#define STARCH_IMPL(_function,_impl) ${gen.symbol_prefix} ## _function ## _aligned_ ## _impl ## _dummy_benchmark
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#define STARCH_BENCHMARK(_function) ${gen.symbol_prefix} ## _function ## _aligned_benchmark
#define STARCH_BENCHMARK_VERIFY(_function) ${gen.symbol_prefix} ## _function ## _aligned_benchmark_verify
#define STARCH_BENCHMARK_RUN(_function, ...) starch_benchmark_run_ ## _function ## _aligned ( __VA_ARGS__ )
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(STARCH_MIX_ALIGNMENT, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)

% for source in sorted(gen.benchmark_files):
  % if any( (function.aligned and function.benchmark == source) for function in gen.functions.values() ):
#include "${os.path.relpath(source.path, current_dir)}"
  % endif
% endfor

% for function in sorted(functions_to_benchmark):
static void starch_benchmark_all_${function.name}(void)
{
    fprintf(stderr, "==== ${function.name} ===\n");
    ${gen.symbol_prefix}${function.name}_benchmark ();
}
% endfor

static int starch_benchmark_compare_result(const void *a, const void *b)
{
    const starch_benchmark_result *left = (const starch_benchmark_result *) a;
    const starch_benchmark_result *right = (const starch_benchmark_result *) b;

    int name_cmp = strcmp(left->name, right->name);
    if (name_cmp)
        return name_cmp;

    if (left->ns < right->ns)
        return -1;
    if (left->ns > right->ns)
        return 1;
    return 0;
}

static void starch_benchmark_usage(const char *argv0)
{
    fprintf(stderr,
        "Usage: %s [OPTION]... [FUNCTION]...\n"
        "Benchmarks starch functions and optionally writes a sorted wisdom file.\n"
        "\n"
        "  -r FILE          Read initial wisdom from FILE\n"
        "  -o FILE          Write sorted wisdom to FILE\n"
        "  -F FLAVOR        Add FLAVOR to whitelist\n"
        "                     (default: no whitelist, run all runtime-supported flavors)\n"
        "  -N FLAVOR        Add FLAVOR to blacklist\n"
        "                     (default: no blacklist, run all runtime-supported flavors)\n"
        "  -l               List compiled-in implementations but don't benchmark them\n"
        "  -V               Run validation tests, but don't run benchmarks\n"
        "  -t               Include only the top candidate per function in wisdom output\n"
        "  -i ITERS         Run benchmark ITERS times and use the mean. If ITERS > 2, ignore\n"
        "                   the smallest and largest runs when calculating the mean.\n"
        "                     (default: 1 iteration)\n"
        "  FUNCTION         Run benchmarks for these functions only\n"
        "                     (default: benchmark all functions)\n"
        "\n"
        "Supported flavors:   "
% for flavor in sorted(gen.flavors.values()):
#ifdef ${flavor.macro}
          "${flavor.name} "
#endif
% endfor
          "\n"
        "Supported functions: "
% for function in sorted(gen.functions.values()):
          "${function.name} "
% endfor
          "\n", argv0);
}

static void starch_benchmark_append_flavor(const char *flavor, starch_benchmark_flavor_list **list)
{
    starch_benchmark_flavor_list *newnode = malloc(sizeof(*newnode));
    if (!newnode) {
        fprintf(stderr, "malloc: %s\n", strerror(errno));
        exit(1);
    }

    newnode->flavor = flavor;
    newnode->next = *list;
    *list = newnode;
}

int main(int argc, char **argv)
{
    int specific = 0;
    const char *output_path = NULL;

    int opt;
    while ((opt = getopt(argc, argv, "r:o:F:N:i:lhtV")) != -1) {
        switch (opt) {
        case 'r':
            if (${gen.sym("read_wisdom")}(optarg) < 0) {
                fprintf(stderr, "%s: cannot read %s: %s\n", argv[0], optarg, strerror(errno));
                return 1;
            }
            fprintf(stderr, "%s: loaded wisdom file %s\n", argv[0], optarg);
            break;

        case 'o':
            output_path = optarg;
            break;

        case 'F':
            if (starch_benchmark_flavor_in_list(optarg, starch_benchmark_flavor_blacklist)) {
                fprintf(stderr, "%s: conflicting -F and -N options for flavor %s\n", argv[0], optarg);
                return 2;
            }
            starch_benchmark_append_flavor(optarg, &starch_benchmark_flavor_whitelist);
            break;

        case 'N':
            if (starch_benchmark_flavor_in_list(optarg, starch_benchmark_flavor_whitelist)) {
                fprintf(stderr, "%s: conflicting -F and -N options for flavor %s\n", argv[0], optarg);
                return 2;
            }
            starch_benchmark_append_flavor(optarg, &starch_benchmark_flavor_blacklist);
            break;

        case 'l':
            starch_benchmark_list_only = true;
            break;

        case 't':
            starch_benchmark_top_only = true;
            break;

        case 'i':
            starch_benchmark_iterations = atoi(optarg);
            break;

        case 'V':
            starch_benchmark_validate_only = true;
            break;

        case 'h':
            starch_benchmark_usage(argv[0]);
            return 0;

        case '?':
        default:
            starch_benchmark_usage(argv[0]);
            return 2;
        }
    }

    if (starch_benchmark_list_only && output_path) {
        fprintf(stderr, "%s: -o and -l options cannot be specified together\n", argv[0]);
        return 2;
    }

    for (int i = optind; i < argc; ++i) {
% for function in sorted(gen.functions.values()):
        if (!strcmp(argv[i], "${function.name}")) {
            specific = 1;
% if function.benchmark:
            starch_benchmark_all_${function.name}();
% else:
            fprintf(stderr, "=== ${function.name} ===\n");
            fprintf(stderr, "  (no benchmark support defined)\n");
% endif
            continue;
        }
% endfor

        fprintf(stderr, "%s: unrecognized function name: %s\n", argv[0], argv[i]);
        return 2;
    }

    if (!specific) {
% for function in sorted(gen.functions.values()):
  % if function.benchmark:
        starch_benchmark_all_${function.name}();
  % else:
        fprintf(stderr, "=== ${function.name} ===\n");
        fprintf(stderr, "  (no benchmark support defined)\n");
  % endif
% endfor
    }

    if (output_path) {
        FILE *out = fopen(output_path, "w");
        if (!out) {
            fprintf(stderr, "%s: cannot open %s: %s\n", argv[0], output_path, strerror(errno));
            return 1;
        }

        fprintf(out, "# generated by ");
        for (int i = 0; i < argc; ++i)
            fprintf(out, "%s ", argv[i]);
        fprintf(out, "\n\n");

        qsort(starch_benchmark_results, starch_benchmark_result_count, sizeof(*starch_benchmark_results), starch_benchmark_compare_result);

        const char *last_name = NULL;
        bool first = true;
        for (unsigned i = 0; i < starch_benchmark_result_count; ++i) {
            starch_benchmark_result *result = &starch_benchmark_results[i];
            if (last_name && strcmp(last_name, result->name) != 0) {
                fprintf(out, "\n");
                first = true;
            }
            last_name = result->name;
            if (starch_benchmark_top_only && !first)
                continue;
            fprintf(out, "%-40s %-40s  # %" PRIu64 " ns/call\n", result->name, result->impl, result->ns);
            first = false;
        }

        fclose(out);
        fprintf(stderr, "%s: wrote sorted wisdom to %s\n", argv[0], output_path);
    }

    return starch_benchmark_validation_failed ? 1 : 0;
}