Regenerate starch-generated code for starch update & aarch64 wisdom
This commit is contained in:
parent
40c24b1c55
commit
132702cfa7
|
|
@ -112,230 +112,6 @@ static bool starch_benchmark_flavor_in_list(const char *flavor, const starch_ben
|
|||
}
|
||||
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_uc8_benchmark (void);
|
||||
bool starch_magnitude_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_magnitude_uc8_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_magnitude_uc8( starch_magnitude_uc8_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_magnitude_uc8_benchmark_verify ( arg0, arg1, arg2 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_magnitude_uc8( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
for (starch_magnitude_uc8_regentry *_entry = starch_magnitude_uc8_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_magnitude_uc8( _entry, arg0, arg1, arg2 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_uc8_aligned_benchmark (void);
|
||||
bool starch_magnitude_uc8_aligned_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_magnitude_uc8_aligned_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_magnitude_uc8_aligned( starch_magnitude_uc8_aligned_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_magnitude_uc8_aligned_benchmark_verify ( arg0, arg1, arg2 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8_aligned";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_magnitude_uc8_aligned( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
for (starch_magnitude_uc8_aligned_regentry *_entry = starch_magnitude_uc8_aligned_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_magnitude_uc8_aligned( _entry, arg0, arg1, arg2 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_power_uc8_benchmark (void);
|
||||
bool starch_magnitude_power_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -1008,6 +784,230 @@ static void starch_benchmark_run_magnitude_sc16q11_aligned( const sc16_t * arg0,
|
|||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_uc8_benchmark (void);
|
||||
bool starch_magnitude_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_magnitude_uc8_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_magnitude_uc8( starch_magnitude_uc8_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_magnitude_uc8_benchmark_verify ( arg0, arg1, arg2 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_magnitude_uc8( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
for (starch_magnitude_uc8_regentry *_entry = starch_magnitude_uc8_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_magnitude_uc8( _entry, arg0, arg1, arg2 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_uc8_aligned_benchmark (void);
|
||||
bool starch_magnitude_uc8_aligned_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_magnitude_uc8_aligned_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_magnitude_uc8_aligned( starch_magnitude_uc8_aligned_regentry * _entry, const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_magnitude_uc8_aligned_benchmark_verify ( arg0, arg1, arg2 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "magnitude_uc8_aligned";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_magnitude_uc8_aligned( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 )
|
||||
{
|
||||
for (starch_magnitude_uc8_aligned_regentry *_entry = starch_magnitude_uc8_aligned_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_magnitude_uc8_aligned( _entry, arg0, arg1, arg2 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_mean_power_u16_benchmark (void);
|
||||
bool starch_mean_power_u16_benchmark_verify ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
|
|
@ -1280,16 +1280,6 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
|
||||
static void starch_benchmark_all_magnitude_uc8(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_uc8 ===\n");
|
||||
starch_magnitude_uc8_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_magnitude_uc8_aligned(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_uc8_aligned ===\n");
|
||||
starch_magnitude_uc8_aligned_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_magnitude_power_uc8(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_power_uc8 ===\n");
|
||||
|
|
@ -1320,6 +1310,16 @@ static void starch_benchmark_all_magnitude_sc16q11_aligned(void)
|
|||
fprintf(stderr, "==== magnitude_sc16q11_aligned ===\n");
|
||||
starch_magnitude_sc16q11_aligned_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_magnitude_uc8(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_uc8 ===\n");
|
||||
starch_magnitude_uc8_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_magnitude_uc8_aligned(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_uc8_aligned ===\n");
|
||||
starch_magnitude_uc8_aligned_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_mean_power_u16(void)
|
||||
{
|
||||
fprintf(stderr, "==== mean_power_u16 ===\n");
|
||||
|
|
@ -1369,28 +1369,28 @@ static void starch_benchmark_usage(const char *argv0)
|
|||
" (default: benchmark all functions)\n"
|
||||
"\n"
|
||||
"Supported flavors: "
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
"generic "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
"armv7a_neon_vfpv4 "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
"armv8_neon_simd "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
"generic "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
"x86_avx2 "
|
||||
#endif
|
||||
"\n"
|
||||
"Supported functions: "
|
||||
"magnitude_uc8 "
|
||||
"magnitude_uc8_aligned "
|
||||
"magnitude_power_uc8 "
|
||||
"magnitude_power_uc8_aligned "
|
||||
"magnitude_sc16 "
|
||||
"magnitude_sc16_aligned "
|
||||
"magnitude_sc16q11 "
|
||||
"magnitude_sc16q11_aligned "
|
||||
"magnitude_uc8 "
|
||||
"magnitude_uc8_aligned "
|
||||
"mean_power_u16 "
|
||||
"mean_power_u16_aligned "
|
||||
"\n", argv0);
|
||||
|
|
@ -1478,16 +1478,6 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
for (int i = optind; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "magnitude_uc8")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_uc8();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "magnitude_uc8_aligned")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_uc8_aligned();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "magnitude_power_uc8")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_power_uc8();
|
||||
|
|
@ -1518,6 +1508,16 @@ int main(int argc, char **argv)
|
|||
starch_benchmark_all_magnitude_sc16q11_aligned();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "magnitude_uc8")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_uc8();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "magnitude_uc8_aligned")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_uc8_aligned();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "mean_power_u16")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_mean_power_u16();
|
||||
|
|
@ -1534,14 +1534,14 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (!specific) {
|
||||
starch_benchmark_all_magnitude_uc8();
|
||||
starch_benchmark_all_magnitude_uc8_aligned();
|
||||
starch_benchmark_all_magnitude_power_uc8();
|
||||
starch_benchmark_all_magnitude_power_uc8_aligned();
|
||||
starch_benchmark_all_magnitude_sc16();
|
||||
starch_benchmark_all_magnitude_sc16_aligned();
|
||||
starch_benchmark_all_magnitude_sc16q11();
|
||||
starch_benchmark_all_magnitude_sc16q11_aligned();
|
||||
starch_benchmark_all_magnitude_uc8();
|
||||
starch_benchmark_all_magnitude_uc8_aligned();
|
||||
starch_benchmark_all_mean_power_u16();
|
||||
starch_benchmark_all_mean_power_u16_aligned();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,199 +19,6 @@ static int starch_regentry_rank_compare (const void *l, const void *r)
|
|||
return left->rank - right->rank;
|
||||
}
|
||||
|
||||
/* dispatcher / registry for magnitude_uc8 */
|
||||
|
||||
starch_magnitude_uc8_regentry * starch_magnitude_uc8_select() {
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_magnitude_uc8_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
|
||||
starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_magnitude_uc8 = entry->callable;
|
||||
starch_magnitude_uc8 ( arg0, arg1, arg2 );
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_ptr starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
|
||||
void starch_magnitude_uc8_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_magnitude_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 4, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 3, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 4, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for magnitude_uc8_aligned */
|
||||
|
||||
starch_magnitude_uc8_aligned_regentry * starch_magnitude_uc8_aligned_select() {
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_magnitude_uc8_aligned_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
|
||||
starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_magnitude_uc8_aligned = entry->callable;
|
||||
starch_magnitude_uc8_aligned ( arg0, arg1, arg2 );
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_aligned_ptr starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
|
||||
void starch_magnitude_uc8_aligned_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_magnitude_uc8_aligned_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_aligned_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 4, "exact_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 5, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 6, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 7, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 8, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 3, "lookup_unroll_4_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 4, "exact_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 5, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 6, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 7, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for magnitude_power_uc8 */
|
||||
|
||||
starch_magnitude_power_uc8_regentry * starch_magnitude_power_uc8_select() {
|
||||
|
|
@ -266,11 +73,15 @@ void starch_magnitude_power_uc8_set_wisdom (const char * const * received_wisdom
|
|||
|
||||
starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -282,15 +93,11 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
|
|||
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 5, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 6, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -357,11 +164,19 @@ void starch_magnitude_power_uc8_aligned_set_wisdom (const char * const * receive
|
|||
|
||||
starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -377,19 +192,11 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
|
|||
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 9, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 10, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 2, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -459,10 +266,13 @@ void starch_magnitude_sc16_set_wisdom (const char * const * received_wisdom)
|
|||
|
||||
starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 2, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -472,13 +282,10 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
|
|||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 4, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -543,10 +350,16 @@ void starch_magnitude_sc16_aligned_set_wisdom (const char * const * received_wis
|
|||
|
||||
starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 2, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -559,16 +372,10 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
|
|||
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 7, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -635,12 +442,17 @@ void starch_magnitude_sc16q11_set_wisdom (const char * const * received_wisdom)
|
|||
|
||||
starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 2, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -654,17 +466,12 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
|
|||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 6, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -733,12 +540,22 @@ void starch_magnitude_sc16q11_aligned_set_wisdom (const char * const * received_
|
|||
|
||||
starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 2, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 11, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -757,22 +574,12 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
|
|||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 11, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 1, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 2, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 3, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -791,6 +598,199 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
|
|||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for magnitude_uc8 */
|
||||
|
||||
starch_magnitude_uc8_regentry * starch_magnitude_uc8_select() {
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_magnitude_uc8_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
|
||||
starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_magnitude_uc8 = entry->callable;
|
||||
starch_magnitude_uc8 ( arg0, arg1, arg2 );
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_ptr starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
|
||||
void starch_magnitude_uc8_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_magnitude_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 4, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 5, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 3, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 4, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for magnitude_uc8_aligned */
|
||||
|
||||
starch_magnitude_uc8_aligned_regentry * starch_magnitude_uc8_aligned_select() {
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_magnitude_uc8_aligned_dispatch ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 ) {
|
||||
starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_magnitude_uc8_aligned = entry->callable;
|
||||
starch_magnitude_uc8_aligned ( arg0, arg1, arg2 );
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_aligned_ptr starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
|
||||
void starch_magnitude_uc8_aligned_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_magnitude_uc8_aligned_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_magnitude_uc8_aligned_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
}
|
||||
|
||||
starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_vrsqrte_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 3, "lookup_unroll_4_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 4, "exact_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 5, "lookup_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 6, "lookup_unroll_4_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 7, "exact_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_exact_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 8, "neon_vrsqrte_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 9, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 3, "lookup_unroll_4_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 4, "exact_x86_avx2_aligned", "x86_avx2", starch_magnitude_uc8_aligned_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 5, "lookup_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_x86_avx2, cpu_supports_avx2 },
|
||||
{ 6, "exact_x86_avx2", "x86_avx2", starch_magnitude_uc8_exact_x86_avx2, cpu_supports_avx2 },
|
||||
{ 7, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for mean_power_u16 */
|
||||
|
||||
starch_mean_power_u16_regentry * starch_mean_power_u16_select() {
|
||||
|
|
@ -845,11 +845,15 @@ void starch_mean_power_u16_set_wisdom (const char * const * received_wisdom)
|
|||
|
||||
starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "u32_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_mean_power_u16_u32_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -861,15 +865,11 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
|
|||
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 3, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -936,11 +936,19 @@ void starch_mean_power_u16_aligned_set_wisdom (const char * const * received_wis
|
|||
|
||||
starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "u32_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
|
|
@ -956,19 +964,11 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
|
|||
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 3, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
{ 1, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 2, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
|
||||
|
|
@ -992,14 +992,6 @@ int starch_read_wisdom (const char * path)
|
|||
return -1;
|
||||
|
||||
/* reset all ranks to identify entries not listed in the wisdom file; we'll assign ranks at the end to produce a stable sort */
|
||||
int rank_magnitude_uc8 = 0;
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_magnitude_uc8_aligned = 0;
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_magnitude_power_uc8 = 0;
|
||||
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
|
|
@ -1024,6 +1016,14 @@ int starch_read_wisdom (const char * path)
|
|||
for (starch_magnitude_sc16q11_aligned_regentry *entry = starch_magnitude_sc16q11_aligned_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_magnitude_uc8 = 0;
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_magnitude_uc8_aligned = 0;
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_mean_power_u16 = 0;
|
||||
for (starch_mean_power_u16_regentry *entry = starch_mean_power_u16_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
|
|
@ -1065,24 +1065,6 @@ int starch_read_wisdom (const char * path)
|
|||
*end = 0;
|
||||
|
||||
/* try to find a matching registry entry */
|
||||
if (!strcmp(name, "magnitude_uc8")) {
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_magnitude_uc8;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "magnitude_uc8_aligned")) {
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_magnitude_uc8_aligned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "magnitude_power_uc8")) {
|
||||
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
|
|
@ -1137,6 +1119,24 @@ int starch_read_wisdom (const char * path)
|
|||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "magnitude_uc8")) {
|
||||
for (starch_magnitude_uc8_regentry *entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_magnitude_uc8;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "magnitude_uc8_aligned")) {
|
||||
for (starch_magnitude_uc8_aligned_regentry *entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_magnitude_uc8_aligned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "mean_power_u16")) {
|
||||
for (starch_mean_power_u16_regentry *entry = starch_mean_power_u16_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
|
|
@ -1165,28 +1165,6 @@ int starch_read_wisdom (const char * path)
|
|||
fclose(fp);
|
||||
|
||||
/* assign ranks to unmatched items to (stable) sort them last; re-sort everything */
|
||||
{
|
||||
starch_magnitude_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_magnitude_uc8;
|
||||
}
|
||||
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
}
|
||||
{
|
||||
starch_magnitude_uc8_aligned_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_magnitude_uc8_aligned;
|
||||
}
|
||||
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
}
|
||||
{
|
||||
starch_magnitude_power_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
|
|
@ -1253,6 +1231,28 @@ int starch_read_wisdom (const char * path)
|
|||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_sc16q11_aligned = starch_magnitude_sc16q11_aligned_dispatch;
|
||||
}
|
||||
{
|
||||
starch_magnitude_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_magnitude_uc8;
|
||||
}
|
||||
qsort(starch_magnitude_uc8_registry, entry - starch_magnitude_uc8_registry, sizeof(starch_magnitude_uc8_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8 = starch_magnitude_uc8_dispatch;
|
||||
}
|
||||
{
|
||||
starch_magnitude_uc8_aligned_regentry *entry;
|
||||
for (entry = starch_magnitude_uc8_aligned_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_magnitude_uc8_aligned;
|
||||
}
|
||||
qsort(starch_magnitude_uc8_aligned_registry, entry - starch_magnitude_uc8_aligned_registry, sizeof(starch_magnitude_uc8_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_magnitude_uc8_aligned = starch_magnitude_uc8_aligned_dispatch;
|
||||
}
|
||||
{
|
||||
starch_mean_power_u16_regentry *entry;
|
||||
for (entry = starch_mean_power_u16_registry; entry->name; ++entry) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
# -*- makefile -*-
|
||||
|
||||
|
||||
# starch generated makefile fragment. do not edit.
|
||||
#
|
||||
# This makefile is designed to be included in a surrounding makefile. The including makefile
|
||||
# should set $(STARCH_COMPILE) to a (partial) command line that provides suitable cflags etc
|
||||
# and handles the following appended things:
|
||||
# * a C source filename to compile to the corresponding .o file
|
||||
# * a -o option to specify the output object file
|
||||
# * additional command-line arguments to set compile flags as defined in each flavor
|
||||
#
|
||||
# Including the makefile fragment provides these variables/rules:
|
||||
#
|
||||
# $(STARCH_CFLAGS): additional cflags that may be used when compiling other code that uses starch.h
|
||||
# (not required - if omitted, the only change is that flavor-specific prototypes are unavailable)
|
||||
# $(STARCH_OBJS): a list of object files to link to the main binary
|
||||
# $(STARCH_BENCHMARK_OBJ): object files providing a standalone benchmarking app (link all of $(STARCH_OBJS) too)
|
||||
# explicit build rules for each object file listed in $(STARCH_OBJS)
|
||||
|
||||
STARCH_CFLAGS := -DSTARCH_MIX_AARCH64
|
||||
|
||||
|
||||
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv8-a+simd -ffast-math dsp/generated/flavor.armv8_neon_simd.c -o dsp/generated/flavor.armv8_neon_simd.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv8_neon_simd.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_ARM
|
||||
|
||||
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
|
||||
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_X86
|
||||
|
||||
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -6,11 +6,12 @@
|
|||
|
||||
/* mixes */
|
||||
|
||||
/* Generic build, compiler defaults only */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
/* AARCH64 */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
#define STARCH_FLAVOR_GENERIC
|
||||
#define STARCH_MIX_ALIGNMENT 1
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
#define STARCH_MIX_ALIGNMENT 32
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
/* ARM */
|
||||
#ifdef STARCH_MIX_ARM
|
||||
|
|
@ -19,12 +20,11 @@
|
|||
#define STARCH_MIX_ALIGNMENT 16
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
/* AARCH64 */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
/* Generic build, compiler defaults only */
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
#define STARCH_FLAVOR_GENERIC
|
||||
#define STARCH_MIX_ALIGNMENT 32
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
#define STARCH_MIX_ALIGNMENT 1
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
/* x64 */
|
||||
#ifdef STARCH_MIX_X86
|
||||
|
|
@ -197,28 +197,16 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
|
|||
|
||||
/* flavors and prototypes */
|
||||
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_GENERIC */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
int cpu_supports_armv7_neon_vfpv4 (void);
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -227,12 +215,14 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
|
|||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -243,28 +233,26 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
int cpu_supports_armv8_simd (void);
|
||||
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -273,12 +261,14 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t *
|
|||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -289,38 +279,56 @@ void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0,
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
#endif /* STARCH_FLAVOR_ARMV8_NEON_SIMD */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
#endif /* STARCH_FLAVOR_GENERIC */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
int cpu_supports_avx2 (void);
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -329,18 +337,10 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
|
|||
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
#endif /* STARCH_FLAVOR_X86_AVX2 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
|
|
|||
Loading…
Reference in New Issue