Regenerate starch-generated code for starch update & aarch64 wisdom

This commit is contained in:
Oliver Jowett 2021-02-09 14:15:33 +08:00
parent 40c24b1c55
commit 132702cfa7
7 changed files with 757 additions and 718 deletions

View File

@ -112,230 +112,6 @@ static bool starch_benchmark_flavor_in_list(const char *flavor, const starch_ben
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_uc8_benchmark (void);
bool starch_magnitude_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_magnitude_uc8_benchmark(void);
static void starch_benchmark_one_magnitude_uc8( starch_magnitude_uc8_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
/* verify correctness of the output */
if (! starch_magnitude_uc8_benchmark_verify ( arg0, arg1, arg2 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_magnitude_uc8( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
for (starch_magnitude_uc8_regentry *_entry = starch_magnitude_uc8_registry; _entry->name; ++_entry) {
starch_benchmark_one_magnitude_uc8( _entry, arg0, arg1, arg2 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_uc8_aligned_benchmark (void);
bool starch_magnitude_uc8_aligned_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_magnitude_uc8_aligned_benchmark(void);
static void starch_benchmark_one_magnitude_uc8_aligned( starch_magnitude_uc8_aligned_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
/* verify correctness of the output */
if (! starch_magnitude_uc8_aligned_benchmark_verify ( arg0, arg1, arg2 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8_aligned";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_magnitude_uc8_aligned( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
for (starch_magnitude_uc8_aligned_regentry *_entry = starch_magnitude_uc8_aligned_registry; _entry->name; ++_entry) {
starch_benchmark_one_magnitude_uc8_aligned( _entry, arg0, arg1, arg2 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_power_uc8_benchmark (void);
bool starch_magnitude_power_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -1008,6 +784,230 @@ static void starch_benchmark_run_magnitude_sc16q11_aligned( const sc16_t * arg0,
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_uc8_benchmark (void);
bool starch_magnitude_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_magnitude_uc8_benchmark(void);
static void starch_benchmark_one_magnitude_uc8( starch_magnitude_uc8_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
/* verify correctness of the output */
if (! starch_magnitude_uc8_benchmark_verify ( arg0, arg1, arg2 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_magnitude_uc8( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
for (starch_magnitude_uc8_regentry *_entry = starch_magnitude_uc8_registry; _entry->name; ++_entry) {
starch_benchmark_one_magnitude_uc8( _entry, arg0, arg1, arg2 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_uc8_aligned_benchmark (void);
bool starch_magnitude_uc8_aligned_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_magnitude_uc8_aligned_benchmark(void);
static void starch_benchmark_one_magnitude_uc8_aligned( starch_magnitude_uc8_aligned_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
/* verify correctness of the output */
if (! starch_magnitude_uc8_aligned_benchmark_verify ( arg0, arg1, arg2 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8_aligned";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_magnitude_uc8_aligned( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
{
for (starch_magnitude_uc8_aligned_regentry *_entry = starch_magnitude_uc8_aligned_registry; _entry->name; ++_entry) {
starch_benchmark_one_magnitude_uc8_aligned( _entry, arg0, arg1, arg2 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_mean_power_u16_benchmark (void);
bool starch_mean_power_u16_benchmark_verify ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
@ -1280,16 +1280,6 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
static void starch_benchmark_all_magnitude_uc8(void)
{
fprintf(stderr, "==== magnitude_uc8 ===\n");
starch_magnitude_uc8_benchmark ();
}
static void starch_benchmark_all_magnitude_uc8_aligned(void)
{
fprintf(stderr, "==== magnitude_uc8_aligned ===\n");
starch_magnitude_uc8_aligned_benchmark ();
}
static void starch_benchmark_all_magnitude_power_uc8(void)
{
fprintf(stderr, "==== magnitude_power_uc8 ===\n");
@ -1320,6 +1310,16 @@ static void starch_benchmark_all_magnitude_sc16q11_aligned(void)
fprintf(stderr, "==== magnitude_sc16q11_aligned ===\n");
starch_magnitude_sc16q11_aligned_benchmark ();
}
static void starch_benchmark_all_magnitude_uc8(void)
{
fprintf(stderr, "==== magnitude_uc8 ===\n");
starch_magnitude_uc8_benchmark ();
}
static void starch_benchmark_all_magnitude_uc8_aligned(void)
{
fprintf(stderr, "==== magnitude_uc8_aligned ===\n");
starch_magnitude_uc8_aligned_benchmark ();
}
static void starch_benchmark_all_mean_power_u16(void)
{
fprintf(stderr, "==== mean_power_u16 ===\n");
@ -1369,28 +1369,28 @@ static void starch_benchmark_usage(const char *argv0)
" (default: benchmark all functions)\n"
"\n"
"Supported flavors: "
#ifdef STARCH_FLAVOR_GENERIC
"generic "
#endif
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
"armv7a_neon_vfpv4 "
#endif
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
"armv8_neon_simd "
#endif
#ifdef STARCH_FLAVOR_GENERIC
"generic "
#endif
#ifdef STARCH_FLAVOR_X86_AVX2
"x86_avx2 "
#endif
"\n"
"Supported functions: "
"magnitude_uc8 "
"magnitude_uc8_aligned "
"magnitude_power_uc8 "
"magnitude_power_uc8_aligned "
"magnitude_sc16 "
"magnitude_sc16_aligned "
"magnitude_sc16q11 "
"magnitude_sc16q11_aligned "
"magnitude_uc8 "
"magnitude_uc8_aligned "
"mean_power_u16 "
"mean_power_u16_aligned "
"\n", argv0);
@ -1478,16 +1478,6 @@ int main(int argc, char **argv)
}
for (int i = optind; i < argc; ++i) {
if (!strcmp(argv[i], "magnitude_uc8")) {
specific = 1;
starch_benchmark_all_magnitude_uc8();
continue;
}
if (!strcmp(argv[i], "magnitude_uc8_aligned")) {
specific = 1;
starch_benchmark_all_magnitude_uc8_aligned();
continue;
}
if (!strcmp(argv[i], "magnitude_power_uc8")) {
specific = 1;
starch_benchmark_all_magnitude_power_uc8();
@ -1518,6 +1508,16 @@ int main(int argc, char **argv)
starch_benchmark_all_magnitude_sc16q11_aligned();
continue;
}
if (!strcmp(argv[i], "magnitude_uc8")) {
specific = 1;
starch_benchmark_all_magnitude_uc8();
continue;
}
if (!strcmp(argv[i], "magnitude_uc8_aligned")) {
specific = 1;
starch_benchmark_all_magnitude_uc8_aligned();
continue;
}
if (!strcmp(argv[i], "mean_power_u16")) {
specific = 1;
starch_benchmark_all_mean_power_u16();
@ -1534,14 +1534,14 @@ int main(int argc, char **argv)
}
if (!specific) {
starch_benchmark_all_magnitude_uc8();
starch_benchmark_all_magnitude_uc8_aligned();
starch_benchmark_all_magnitude_power_uc8();
starch_benchmark_all_magnitude_power_uc8_aligned();
starch_benchmark_all_magnitude_sc16();
starch_benchmark_all_magnitude_sc16_aligned();
starch_benchmark_all_magnitude_sc16q11();
starch_benchmark_all_magnitude_sc16q11_aligned();
starch_benchmark_all_magnitude_uc8();
starch_benchmark_all_magnitude_uc8_aligned();
starch_benchmark_all_mean_power_u16();
starch_benchmark_all_mean_power_u16_aligned();
}

View File

@ -19,199 +19,6 @@ static int starch_regentry_rank_compare (const void *l, const void *r)
return left->rank - right->rank;
}
/* dispatcher / registry for magnitude_uc8 */
starch_magnitude_uc8_regentry * starch_magnitude_uc8_select() {
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_magnitude_uc8_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_select();
if (!entry)
abort();
starch_magnitude_uc8 = entry->callable;
starch_magnitude_uc8 ( arg0, arg1, arg2 );
}
starch_magnitude_uc8_ptr starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
void starch_magnitude_uc8_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_magnitude_uc8_regentry *entry;
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
}
starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 4, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
{ 3, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
{ 4, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for magnitude_uc8_aligned */
starch_magnitude_uc8_aligned_regentry * starch_magnitude_uc8_aligned_select() {
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_magnitude_uc8_aligned_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_select();
if (!entry)
abort();
starch_magnitude_uc8_aligned = entry->callable;
starch_magnitude_uc8_aligned ( arg0, arg1, arg2 );
}
starch_magnitude_uc8_aligned_ptr starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
void starch_magnitude_uc8_aligned_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_magnitude_uc8_aligned_regentry *entry;
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_aligned_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
}
starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 4, "exact_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 5, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 6, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 7, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 8, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_x86_avx2, cpu_supports_avx2 },
{ 3, "lookup_unroll_4_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 4, "exact_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_exact_x86_avx2, cpu_supports_avx2 },
{ 5, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
{ 6, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
{ 7, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for magnitude_power_uc8 */
starch_magnitude_power_uc8_regentry * starch_magnitude_power_uc8_select() {
@ -266,11 +73,15 @@ void starch_magnitude_power_uc8_set_wisdom (const char * const * received_wisdom
starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
{ 2, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -282,15 +93,11 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 5, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 6, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
@ -357,11 +164,19 @@ void starch_magnitude_power_uc8_aligned_set_wisdom (const char * const * receive
starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
{ 2, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -377,19 +192,11 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 9, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 10, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
@ -459,10 +266,13 @@ void starch_magnitude_sc16_set_wisdom (const char * const * received_wisdom)
starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 2, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -472,13 +282,10 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 4, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
@ -543,10 +350,16 @@ void starch_magnitude_sc16_aligned_set_wisdom (const char * const * received_wis
starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 2, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -559,16 +372,10 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 7, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
@ -635,12 +442,17 @@ void starch_magnitude_sc16q11_set_wisdom (const char * const * received_wisdom)
starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 2, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -654,17 +466,12 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 6, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
@ -733,12 +540,22 @@ void starch_magnitude_sc16q11_aligned_set_wisdom (const char * const * received_
starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 2, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 11, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -757,22 +574,12 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 11, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_GENERIC
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
@ -791,6 +598,199 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for magnitude_uc8 */
starch_magnitude_uc8_regentry * starch_magnitude_uc8_select() {
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_magnitude_uc8_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_select();
if (!entry)
abort();
starch_magnitude_uc8 = entry->callable;
starch_magnitude_uc8 ( arg0, arg1, arg2 );
}
starch_magnitude_uc8_ptr starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
void starch_magnitude_uc8_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_magnitude_uc8_regentry *entry;
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
}
starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 4, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_GENERIC
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
{ 3, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
{ 4, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for magnitude_uc8_aligned */
starch_magnitude_uc8_aligned_regentry * starch_magnitude_uc8_aligned_select() {
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_magnitude_uc8_aligned_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_select();
if (!entry)
abort();
starch_magnitude_uc8_aligned = entry->callable;
starch_magnitude_uc8_aligned ( arg0, arg1, arg2 );
}
starch_magnitude_uc8_aligned_ptr starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
void starch_magnitude_uc8_aligned_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_magnitude_uc8_aligned_regentry *entry;
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_aligned_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
}
starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] = {
#ifdef STARCH_MIX_AARCH64
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 4, "exact_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 5, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 6, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 7, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 8, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_GENERIC
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_x86_avx2, cpu_supports_avx2 },
{ 3, "lookup_unroll_4_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 4, "exact_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_exact_x86_avx2, cpu_supports_avx2 },
{ 5, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
{ 6, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
{ 7, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for mean_power_u16 */
starch_mean_power_u16_regentry * starch_mean_power_u16_select() {
@ -845,11 +845,15 @@ void starch_mean_power_u16_set_wisdom (const char * const * received_wisdom)
starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "u32_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_mean_power_u16_u32_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -861,15 +865,11 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
#ifdef STARCH_MIX_GENERIC
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 3, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
@ -936,11 +936,19 @@ void starch_mean_power_u16_aligned_set_wisdom (const char * const * received_wis
starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[] = {
#ifdef STARCH_MIX_GENERIC
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_AARCH64
{ 0, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "u32_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
@ -956,19 +964,11 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
#ifdef STARCH_MIX_GENERIC
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 3, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
@ -992,14 +992,6 @@ int starch_read_wisdom (const char * path)
return -1;
/* reset all ranks to identify entries not listed in the wisdom file; we'll assign ranks at the end to produce a stable sort */
int rank_magnitude_uc8 = 0;
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_magnitude_uc8_aligned = 0;
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_magnitude_power_uc8 = 0;
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
entry->rank = 0;
@ -1024,6 +1016,14 @@ int starch_read_wisdom (const char * path)
for (starch_magnitude_sc16q11_aligned_regentry *entry = starch_magnitude_sc16q11_aligned_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_magnitude_uc8 = 0;
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_magnitude_uc8_aligned = 0;
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_mean_power_u16 = 0;
for (starch_mean_power_u16_regentry *entry = starch_mean_power_u16_registry; entry->name; ++entry) {
entry->rank = 0;
@ -1065,24 +1065,6 @@ int starch_read_wisdom (const char * path)
*end = 0;
/* try to find a matching registry entry */
if (!strcmp(name, "magnitude_uc8")) {
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_magnitude_uc8;
break;
}
}
continue;
}
if (!strcmp(name, "magnitude_uc8_aligned")) {
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_magnitude_uc8_aligned;
break;
}
}
continue;
}
if (!strcmp(name, "magnitude_power_uc8")) {
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
@ -1137,6 +1119,24 @@ int starch_read_wisdom (const char * path)
}
continue;
}
if (!strcmp(name, "magnitude_uc8")) {
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_magnitude_uc8;
break;
}
}
continue;
}
if (!strcmp(name, "magnitude_uc8_aligned")) {
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_magnitude_uc8_aligned;
break;
}
}
continue;
}
if (!strcmp(name, "mean_power_u16")) {
for (starch_mean_power_u16_regentry *entry = starch_mean_power_u16_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
@ -1165,28 +1165,6 @@ int starch_read_wisdom (const char * path)
fclose(fp);
/* assign ranks to unmatched items to (stable) sort them last; re-sort everything */
{
starch_magnitude_uc8_regentry *entry;
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_magnitude_uc8;
}
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
}
{
starch_magnitude_uc8_aligned_regentry *entry;
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_magnitude_uc8_aligned;
}
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
}
{
starch_magnitude_power_uc8_regentry *entry;
for (entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
@ -1253,6 +1231,28 @@ int starch_read_wisdom (const char * path)
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_sc16q11_aligned = starch_magnitude_sc16q11_aligned_dispatch;
}
{
starch_magnitude_uc8_regentry *entry;
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_magnitude_uc8;
}
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
}
{
starch_magnitude_uc8_aligned_regentry *entry;
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_magnitude_uc8_aligned;
}
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
}
{
starch_mean_power_u16_regentry *entry;
for (entry = starch_mean_power_u16_registry; entry->name; ++entry) {

View File

@ -0,0 +1,39 @@
# -*- makefile -*-
# starch generated makefile fragment. do not edit.
#
# This makefile is designed to be included in a surrounding makefile. The including makefile
# should set $(STARCH_COMPILE) to a (partial) command line that provides suitable cflags etc
# and handles the following appended things:
# * a C source filename to compile to the corresponding .o file
# * a -o option to specify the output object file
# * additional command-line arguments to set compile flags as defined in each flavor
#
# Including the makefile fragment provides these variables/rules:
#
# $(STARCH_CFLAGS): additional cflags that may be used when compiling other code that uses starch.h
# (not required - if omitted, the only change is that flavor-specific prototypes are unavailable)
# $(STARCH_OBJS): a list of object files to link to the main binary
# $(STARCH_BENCHMARK_OBJ): object files providing a standalone benchmarking app (link all of $(STARCH_OBJS) too)
# explicit build rules for each object file listed in $(STARCH_OBJS)
STARCH_CFLAGS := -DSTARCH_MIX_AARCH64
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv8-a+simd -ffast-math dsp/generated/flavor.armv8_neon_simd.c -o dsp/generated/flavor.armv8_neon_simd.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv8_neon_simd.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_ARM
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,16 +21,16 @@
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_X86
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -6,11 +6,12 @@
/* mixes */
/* Generic build, compiler defaults only */
#ifdef STARCH_MIX_GENERIC
/* AARCH64 */
#ifdef STARCH_MIX_AARCH64
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
#define STARCH_FLAVOR_GENERIC
#define STARCH_MIX_ALIGNMENT 1
#endif /* STARCH_MIX_GENERIC */
#define STARCH_MIX_ALIGNMENT 32
#endif /* STARCH_MIX_AARCH64 */
/* ARM */
#ifdef STARCH_MIX_ARM
@ -19,12 +20,11 @@
#define STARCH_MIX_ALIGNMENT 16
#endif /* STARCH_MIX_ARM */
/* AARCH64 */
#ifdef STARCH_MIX_AARCH64
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
/* Generic build, compiler defaults only */
#ifdef STARCH_MIX_GENERIC
#define STARCH_FLAVOR_GENERIC
#define STARCH_MIX_ALIGNMENT 32
#endif /* STARCH_MIX_AARCH64 */
#define STARCH_MIX_ALIGNMENT 1
#endif /* STARCH_MIX_GENERIC */
/* x64 */
#ifdef STARCH_MIX_X86
@ -197,28 +197,16 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
/* flavors and prototypes */
#ifdef STARCH_FLAVOR_GENERIC
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_GENERIC */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
int cpu_supports_armv7_neon_vfpv4 (void);
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -227,12 +215,14 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -243,28 +233,26 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
int cpu_supports_armv8_simd (void);
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -273,12 +261,14 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t *
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -289,38 +279,56 @@ void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0,
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
#endif /* STARCH_FLAVOR_ARMV8_NEON_SIMD */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_GENERIC
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
#endif /* STARCH_FLAVOR_GENERIC */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_X86_AVX2
int cpu_supports_avx2 (void);
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -329,18 +337,10 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
#endif /* STARCH_FLAVOR_X86_AVX2 */
int starch_read_wisdom (const char * path);