Use a starch implementation for the burst-detection sample counting loop.
This commit is contained in:
parent
1cb4284e6c
commit
2480e5169c
|
|
@ -292,12 +292,8 @@ static void adaptive_burst_scan_windows(uint16_t *buf, unsigned windows)
|
|||
// return the number of loud samples seen
|
||||
static inline unsigned adaptive_burst_count_samples(uint16_t *buf, unsigned n)
|
||||
{
|
||||
unsigned counter = 0;
|
||||
while (n--) {
|
||||
if (buf[0] > 46395) // -3dBFS
|
||||
++counter;
|
||||
++buf;
|
||||
}
|
||||
unsigned counter;
|
||||
starch_count_above_u16(buf, n, 46395 /* -3dBFS */, &counter);
|
||||
return counter;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,39 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
void STARCH_BENCHMARK(count_above_u16) (void)
|
||||
{
|
||||
uint16_t *in = NULL;
|
||||
const unsigned len = 96; /* Typical use is with short burst windows (40us) */
|
||||
const unsigned threshold = 46395; /* -3dBFS */
|
||||
|
||||
if (!(in = STARCH_BENCHMARK_ALLOC(len, uint16_t))) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
srand(1);
|
||||
for (unsigned i = 0; i < len; ++i) {
|
||||
in[i] = rand() % 65536;
|
||||
}
|
||||
|
||||
unsigned count;
|
||||
STARCH_BENCHMARK_RUN( count_above_u16, in, len, threshold, &count );
|
||||
|
||||
done:
|
||||
STARCH_BENCHMARK_FREE(in);
|
||||
}
|
||||
|
||||
bool STARCH_BENCHMARK_VERIFY(count_above_u16) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
|
||||
{
|
||||
unsigned expected = 0;
|
||||
for (unsigned i = 0; i < len; ++i) {
|
||||
if (in[i] >= threshold)
|
||||
++expected;
|
||||
}
|
||||
|
||||
if (expected != *out_count) {
|
||||
fprintf(stderr, "verification failed: expected count %u, got count %u\n", expected, *out_count);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -112,6 +112,230 @@ static bool starch_benchmark_flavor_in_list(const char *flavor, const starch_ben
|
|||
}
|
||||
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_count_above_u16_benchmark (void);
|
||||
bool starch_count_above_u16_benchmark_verify ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_count_above_u16_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_count_above_u16( starch_count_above_u16_regentry * _entry, const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_count_above_u16_benchmark_verify ( arg0, arg1, arg2, arg3 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "count_above_u16";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_count_above_u16( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
|
||||
{
|
||||
for (starch_count_above_u16_regentry *_entry = starch_count_above_u16_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_count_above_u16( _entry, arg0, arg1, arg2, arg3 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_count_above_u16_aligned_benchmark (void);
|
||||
bool starch_count_above_u16_aligned_benchmark_verify ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
|
||||
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
|
||||
void starch_count_above_u16_aligned_benchmark(void);
|
||||
|
||||
static void starch_benchmark_one_count_above_u16_aligned( starch_count_above_u16_aligned_regentry * _entry, const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
|
||||
{
|
||||
fprintf(stderr, " %-40s ", _entry->name);
|
||||
|
||||
/* test for support */
|
||||
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
|
||||
fprintf(stderr, "unsupported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
|
||||
fprintf(stderr, "skipped (not whitelisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
|
||||
fprintf(stderr, "skipped (blacklisted)\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (starch_benchmark_list_only) {
|
||||
fprintf(stderr, "supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* initial warmup */
|
||||
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
|
||||
/* verify correctness of the output */
|
||||
if (! starch_count_above_u16_aligned_benchmark_verify ( arg0, arg1, arg2, arg3 )) {
|
||||
fprintf(stderr, "skipped (verification failed)\n");
|
||||
starch_benchmark_validation_failed = true;
|
||||
return;
|
||||
}
|
||||
if (starch_benchmark_validate_only) {
|
||||
fprintf(stderr, "validation ok\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* pre-benchmark, find a loop count that takes at least 100ms */
|
||||
starch_benchmark_time _start, _end;
|
||||
uint64_t _elapsed = 0;
|
||||
uint64_t _loops = 127;
|
||||
while (_elapsed < 100000000) {
|
||||
_loops *= 2;
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
_elapsed = starch_benchmark_elapsed(&_start, &_end);
|
||||
}
|
||||
|
||||
/* real benchmark, run for approx 1 second */
|
||||
_loops = _loops * 1000000000 / _elapsed;
|
||||
|
||||
_elapsed = 0;
|
||||
uint64_t _elapsed_min = UINT64_MAX;
|
||||
uint64_t _elapsed_max = 0;
|
||||
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
|
||||
starch_benchmark_get_time(&_start);
|
||||
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
|
||||
_entry->callable ( arg0, arg1, arg2, arg3 );
|
||||
starch_benchmark_get_time(&_end);
|
||||
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
|
||||
if (_elapsed_one < _elapsed_min)
|
||||
_elapsed_min = _elapsed_one;
|
||||
if (_elapsed_one > _elapsed_max)
|
||||
_elapsed_max = _elapsed_one;
|
||||
_elapsed += _elapsed_one;
|
||||
}
|
||||
|
||||
uint64_t _per_loop;
|
||||
if (starch_benchmark_iterations > 2)
|
||||
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
|
||||
else
|
||||
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
|
||||
|
||||
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
|
||||
|
||||
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
|
||||
if (!starch_benchmark_result_size)
|
||||
starch_benchmark_result_size = 64;
|
||||
else
|
||||
starch_benchmark_result_size *= 2;
|
||||
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
|
||||
if (!starch_benchmark_results) {
|
||||
fprintf(stderr, "realloc: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
starch_benchmark_results[starch_benchmark_result_count].name = "count_above_u16_aligned";
|
||||
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
|
||||
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
|
||||
++starch_benchmark_result_count;
|
||||
}
|
||||
|
||||
static void starch_benchmark_run_count_above_u16_aligned( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
|
||||
{
|
||||
for (starch_count_above_u16_aligned_regentry *_entry = starch_count_above_u16_aligned_registry; _entry->name; ++_entry) {
|
||||
starch_benchmark_one_count_above_u16_aligned( _entry, arg0, arg1, arg2, arg3 );
|
||||
}
|
||||
}
|
||||
|
||||
/* prototypes for benchmark helpers provided by user code */
|
||||
void starch_magnitude_power_uc8_benchmark (void);
|
||||
bool starch_magnitude_power_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -1246,6 +1470,7 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(1, alignof(_type), (_count) * sizeof(_type)))
|
||||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/count_above_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
|
|
@ -1274,12 +1499,23 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(STARCH_MIX_ALIGNMENT, alignof(_type), (_count) * sizeof(_type)))
|
||||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/count_above_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
|
||||
static void starch_benchmark_all_count_above_u16(void)
|
||||
{
|
||||
fprintf(stderr, "==== count_above_u16 ===\n");
|
||||
starch_count_above_u16_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_count_above_u16_aligned(void)
|
||||
{
|
||||
fprintf(stderr, "==== count_above_u16_aligned ===\n");
|
||||
starch_count_above_u16_aligned_benchmark ();
|
||||
}
|
||||
static void starch_benchmark_all_magnitude_power_uc8(void)
|
||||
{
|
||||
fprintf(stderr, "==== magnitude_power_uc8 ===\n");
|
||||
|
|
@ -1383,6 +1619,8 @@ static void starch_benchmark_usage(const char *argv0)
|
|||
#endif
|
||||
"\n"
|
||||
"Supported functions: "
|
||||
"count_above_u16 "
|
||||
"count_above_u16_aligned "
|
||||
"magnitude_power_uc8 "
|
||||
"magnitude_power_uc8_aligned "
|
||||
"magnitude_sc16 "
|
||||
|
|
@ -1478,6 +1716,16 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
for (int i = optind; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "count_above_u16")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_count_above_u16();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "count_above_u16_aligned")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_count_above_u16_aligned();
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(argv[i], "magnitude_power_uc8")) {
|
||||
specific = 1;
|
||||
starch_benchmark_all_magnitude_power_uc8();
|
||||
|
|
@ -1534,6 +1782,8 @@ int main(int argc, char **argv)
|
|||
}
|
||||
|
||||
if (!specific) {
|
||||
starch_benchmark_all_count_above_u16();
|
||||
starch_benchmark_all_count_above_u16_aligned();
|
||||
starch_benchmark_all_magnitude_power_uc8();
|
||||
starch_benchmark_all_magnitude_power_uc8_aligned();
|
||||
starch_benchmark_all_magnitude_sc16();
|
||||
|
|
|
|||
|
|
@ -19,6 +19,165 @@ static int starch_regentry_rank_compare (const void *l, const void *r)
|
|||
return left->rank - right->rank;
|
||||
}
|
||||
|
||||
/* dispatcher / registry for count_above_u16 */
|
||||
|
||||
starch_count_above_u16_regentry * starch_count_above_u16_select() {
|
||||
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_count_above_u16_dispatch ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 ) {
|
||||
starch_count_above_u16_regentry *entry = starch_count_above_u16_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_count_above_u16 = entry->callable;
|
||||
starch_count_above_u16 ( arg0, arg1, arg2, arg3 );
|
||||
}
|
||||
|
||||
starch_count_above_u16_ptr starch_count_above_u16 = starch_count_above_u16_dispatch;
|
||||
|
||||
void starch_count_above_u16_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_count_above_u16_regentry *entry;
|
||||
for (entry = starch_count_above_u16_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_count_above_u16_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_count_above_u16_registry, entry - starch_count_above_u16_registry, sizeof(starch_count_above_u16_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_count_above_u16 = starch_count_above_u16_dispatch;
|
||||
}
|
||||
|
||||
starch_count_above_u16_regentry starch_count_above_u16_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "generic_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_generic_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "neon_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_neon_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
{ 2, "generic_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "generic_x86_avx2", "x86_avx2", starch_count_above_u16_generic_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for count_above_u16_aligned */
|
||||
|
||||
starch_count_above_u16_aligned_regentry * starch_count_above_u16_aligned_select() {
|
||||
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry;
|
||||
entry->name;
|
||||
++entry)
|
||||
{
|
||||
if (entry->flavor_supported && !(entry->flavor_supported()))
|
||||
continue;
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void starch_count_above_u16_aligned_dispatch ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 ) {
|
||||
starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_select();
|
||||
if (!entry)
|
||||
abort();
|
||||
|
||||
starch_count_above_u16_aligned = entry->callable;
|
||||
starch_count_above_u16_aligned ( arg0, arg1, arg2, arg3 );
|
||||
}
|
||||
|
||||
starch_count_above_u16_aligned_ptr starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
|
||||
|
||||
void starch_count_above_u16_aligned_set_wisdom (const char * const * received_wisdom)
|
||||
{
|
||||
/* re-rank the registry based on received wisdom */
|
||||
starch_count_above_u16_aligned_regentry *entry;
|
||||
for (entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
|
||||
const char * const *search;
|
||||
for (search = received_wisdom; *search; ++search) {
|
||||
if (!strcmp(*search, entry->name)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*search) {
|
||||
/* matches an entry in the wisdom list, order by position in the list */
|
||||
entry->rank = search - received_wisdom;
|
||||
} else {
|
||||
/* no match, rank after all possible matches, retaining existing order */
|
||||
entry->rank = (search - received_wisdom) + (entry - starch_count_above_u16_aligned_registry);
|
||||
}
|
||||
}
|
||||
|
||||
/* re-sort based on the new ranking */
|
||||
qsort(starch_count_above_u16_aligned_registry, entry - starch_count_above_u16_aligned_registry, sizeof(starch_count_above_u16_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
|
||||
}
|
||||
|
||||
starch_count_above_u16_aligned_regentry starch_count_above_u16_aligned_registry[] = {
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "generic_armv8_neon_simd_aligned", "armv8_neon_simd", starch_count_above_u16_aligned_generic_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "neon_armv8_neon_simd_aligned", "armv8_neon_simd", starch_count_above_u16_aligned_neon_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "generic_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_generic_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "neon_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_neon_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_ARM
|
||||
{ 0, "neon_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
{ 2, "generic_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_count_above_u16_aligned_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 3, "neon_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_count_above_u16_aligned_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
{ 4, "generic_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_GENERIC
|
||||
{ 0, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
#endif /* STARCH_MIX_GENERIC */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "generic_x86_avx2_aligned", "x86_avx2", starch_count_above_u16_aligned_generic_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
|
||||
{ 2, "generic_x86_avx2", "x86_avx2", starch_count_above_u16_generic_x86_avx2, cpu_supports_avx2 },
|
||||
#endif /* STARCH_MIX_X86 */
|
||||
{ 0, NULL, NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
/* dispatcher / registry for magnitude_power_uc8 */
|
||||
|
||||
starch_magnitude_power_uc8_regentry * starch_magnitude_power_uc8_select() {
|
||||
|
|
@ -992,6 +1151,14 @@ int starch_read_wisdom (const char * path)
|
|||
return -1;
|
||||
|
||||
/* reset all ranks to identify entries not listed in the wisdom file; we'll assign ranks at the end to produce a stable sort */
|
||||
int rank_count_above_u16 = 0;
|
||||
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_count_above_u16_aligned = 0;
|
||||
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
}
|
||||
int rank_magnitude_power_uc8 = 0;
|
||||
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
entry->rank = 0;
|
||||
|
|
@ -1065,6 +1232,24 @@ int starch_read_wisdom (const char * path)
|
|||
*end = 0;
|
||||
|
||||
/* try to find a matching registry entry */
|
||||
if (!strcmp(name, "count_above_u16")) {
|
||||
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_count_above_u16;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "count_above_u16_aligned")) {
|
||||
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
entry->rank = ++rank_count_above_u16_aligned;
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!strcmp(name, "magnitude_power_uc8")) {
|
||||
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
if (!strcmp(impl, entry->name)) {
|
||||
|
|
@ -1165,6 +1350,28 @@ int starch_read_wisdom (const char * path)
|
|||
fclose(fp);
|
||||
|
||||
/* assign ranks to unmatched items to (stable) sort them last; re-sort everything */
|
||||
{
|
||||
starch_count_above_u16_regentry *entry;
|
||||
for (entry = starch_count_above_u16_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_count_above_u16;
|
||||
}
|
||||
qsort(starch_count_above_u16_registry, entry - starch_count_above_u16_registry, sizeof(starch_count_above_u16_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_count_above_u16 = starch_count_above_u16_dispatch;
|
||||
}
|
||||
{
|
||||
starch_count_above_u16_aligned_regentry *entry;
|
||||
for (entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
|
||||
if (!entry->rank)
|
||||
entry->rank = ++rank_count_above_u16_aligned;
|
||||
}
|
||||
qsort(starch_count_above_u16_aligned_registry, entry - starch_count_above_u16_aligned_registry, sizeof(starch_count_above_u16_aligned_regentry), starch_regentry_rank_compare);
|
||||
|
||||
/* reset the implementation pointer so the next call will re-select */
|
||||
starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
|
||||
}
|
||||
{
|
||||
starch_magnitude_power_uc8_regentry *entry;
|
||||
for (entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
@ -33,6 +34,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_neon_simd
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
@ -33,6 +34,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_neon_simd
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
@ -32,6 +33,7 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/count_above_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_AARCH64
|
||||
|
||||
|
||||
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv8-a+simd -ffast-math dsp/generated/flavor.armv8_neon_simd.c -o dsp/generated/flavor.armv8_neon_simd.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv8_neon_simd.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_ARM
|
||||
|
||||
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
|
||||
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_X86
|
||||
|
||||
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -195,6 +195,36 @@ extern starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_regi
|
|||
starch_mean_power_u16_aligned_regentry * starch_mean_power_u16_aligned_select();
|
||||
void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wisdom );
|
||||
|
||||
typedef void (* starch_count_above_u16_ptr) ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
extern starch_count_above_u16_ptr starch_count_above_u16;
|
||||
|
||||
typedef struct {
|
||||
int rank;
|
||||
const char *name;
|
||||
const char *flavor;
|
||||
starch_count_above_u16_ptr callable;
|
||||
int (*flavor_supported)();
|
||||
} starch_count_above_u16_regentry;
|
||||
|
||||
extern starch_count_above_u16_regentry starch_count_above_u16_registry[];
|
||||
starch_count_above_u16_regentry * starch_count_above_u16_select();
|
||||
void starch_count_above_u16_set_wisdom( const char * const * received_wisdom );
|
||||
|
||||
typedef void (* starch_count_above_u16_aligned_ptr) ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
extern starch_count_above_u16_aligned_ptr starch_count_above_u16_aligned;
|
||||
|
||||
typedef struct {
|
||||
int rank;
|
||||
const char *name;
|
||||
const char *flavor;
|
||||
starch_count_above_u16_aligned_ptr callable;
|
||||
int (*flavor_supported)();
|
||||
} starch_count_above_u16_aligned_regentry;
|
||||
|
||||
extern starch_count_above_u16_aligned_regentry starch_count_above_u16_aligned_registry[];
|
||||
starch_count_above_u16_aligned_regentry * starch_count_above_u16_aligned_select();
|
||||
void starch_count_above_u16_aligned_set_wisdom( const char * const * received_wisdom );
|
||||
|
||||
/* flavors and prototypes */
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
|
|
@ -233,6 +263,10 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_count_above_u16_generic_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_aligned_generic_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_neon_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_aligned_neon_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -279,6 +313,10 @@ void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0,
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_count_above_u16_generic_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_aligned_generic_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_neon_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_aligned_neon_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -303,6 +341,7 @@ void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t
|
|||
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_count_above_u16_generic_generic ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
#endif /* STARCH_FLAVOR_GENERIC */
|
||||
|
|
@ -337,6 +376,8 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
|
|||
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_count_above_u16_generic_x86_avx2 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_count_above_u16_aligned_generic_x86_avx2 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
|
|||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Count the number of samples in a uint16_t buffer that are >= a threshold.
|
||||
*/
|
||||
void STARCH_IMPL(count_above_u16, generic) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
|
||||
{
|
||||
const uint16_t * restrict in_align = STARCH_ALIGNED(in);
|
||||
|
||||
unsigned count = 0;
|
||||
while (len--) {
|
||||
if (in_align[0] >= threshold)
|
||||
++count;
|
||||
++in_align;
|
||||
}
|
||||
|
||||
*out_count = count;
|
||||
}
|
||||
|
||||
#ifdef STARCH_FEATURE_NEON
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void STARCH_IMPL_REQUIRES(count_above_u16, neon, STARCH_FEATURE_NEON) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
|
||||
{
|
||||
const uint16_t * restrict in_align = STARCH_ALIGNED(in);
|
||||
const uint16x8_t threshold_x8 = vdupq_n_u16(threshold);
|
||||
|
||||
int32x4_t accumulator0 = vdupq_n_s32(0);
|
||||
int32x4_t accumulator1 = vdupq_n_s32(0);
|
||||
|
||||
unsigned len8 = len >> 3;
|
||||
while (len8--) {
|
||||
uint16x8_t mag = vld1q_u16(in_align);
|
||||
int16x8_t compare = vreinterpretq_s16_u16(vcgeq_u16(mag, threshold_x8));
|
||||
accumulator0 = vsubw_s16(accumulator0, vget_low_s16(compare));
|
||||
accumulator1 = vsubw_s16(accumulator1, vget_high_s16(compare));
|
||||
|
||||
in_align += 8;
|
||||
}
|
||||
|
||||
// sum accumulators across all lanes
|
||||
int32x4_t sum2 = vaddq_s32(accumulator0, accumulator1);
|
||||
int32x2_t sum4 = vadd_s32(vget_low_s32(sum2), vget_high_s32(sum2));
|
||||
int32x2_t sum8 = vpadd_s32(sum4, sum4);
|
||||
int32x4_t sum8_x2 = vcombine_s32(sum8, sum8);
|
||||
|
||||
unsigned len1 = len & 7;
|
||||
while (len1--) {
|
||||
uint16x4_t mag = vld1_dup_u16(in_align);
|
||||
int16x4_t compare = vreinterpret_s16_u16(vcge_u16(mag, vget_low_u16(threshold_x8)));
|
||||
sum8_x2 = vsubw_s16(sum8_x2, compare);
|
||||
|
||||
in_align += 1;
|
||||
}
|
||||
|
||||
*out_count = vgetq_lane_s32(sum8_x2, 0);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -20,6 +20,7 @@ gen.add_function(name = 'magnitude_power_uc8', argtypes = ['const uc8_t *', 'uin
|
|||
gen.add_function(name = 'magnitude_sc16', argtypes = ['const sc16_t *', 'uint16_t *', 'unsigned'], aligned = True)
|
||||
gen.add_function(name = 'magnitude_sc16q11', argtypes = ['const sc16_t *', 'uint16_t *', 'unsigned'], aligned = True)
|
||||
gen.add_function(name = 'mean_power_u16', argtypes = ['const uint16_t *', 'unsigned', 'double *', 'double *'], aligned = True)
|
||||
gen.add_function(name = 'count_above_u16', argtypes = ['const uint16_t *', 'unsigned', 'uint16_t', 'unsigned *'], aligned = True)
|
||||
|
||||
gen.add_feature(name='neon', description='ARM NEON')
|
||||
|
||||
|
|
|
|||
|
|
@ -305,6 +305,7 @@ static void showDSP()
|
|||
SHOW(magnitude_sc16);
|
||||
SHOW(magnitude_sc16q11);
|
||||
SHOW(mean_power_u16);
|
||||
SHOW(count_above_u16);
|
||||
|
||||
#undef SHOW
|
||||
|
||||
|
|
|
|||
|
|
@ -29,3 +29,9 @@ mean_power_u16 u64_generic
|
|||
|
||||
mean_power_u16_aligned u32_armv7a_neon_vfpv4_aligned # 44929 ns/call
|
||||
mean_power_u16_aligned u64_generic # 934445 ns/call
|
||||
|
||||
count_above_u16 neon_armv7a_neon_vfpv4 # 35 ns/call
|
||||
count_above_u16 generic_generic # 178 ns/call
|
||||
|
||||
count_above_u16_aligned neon_armv7a_neon_vfpv4 # 34 ns/call
|
||||
count_above_u16_aligned generic_generic # 179 ns/call
|
||||
|
|
|
|||
|
|
@ -14,3 +14,6 @@ magnitude_uc8_aligned lookup_unroll_4_generic
|
|||
|
||||
mean_power_u16 u32_generic
|
||||
mean_power_u16_aligned u32_generic
|
||||
|
||||
count_above_u16 generic_generic
|
||||
count_above_u16_aligned generic_generic
|
||||
|
|
|
|||
|
|
@ -29,3 +29,9 @@ mean_power_u16 u32_generic
|
|||
|
||||
mean_power_u16_aligned u32_x86_avx2_aligned # 11572 ns/call
|
||||
mean_power_u16_aligned u32_generic # 18207 ns/call
|
||||
|
||||
count_above_u16 generic_x86_avx2 # 20 ns/call
|
||||
count_above_u16 generic_generic # 30 ns/call
|
||||
|
||||
count_above_u16_aligned generic_x86_avx2_aligned # 15 ns/call
|
||||
count_above_u16_aligned generic_generic # 31 ns/call
|
||||
|
|
|
|||
|
|
@ -88,3 +88,10 @@ mean_power_u16_aligned u64_x86_avx2_aligned
|
|||
mean_power_u16_aligned u64_x86_avx2 # 31283 ns/call
|
||||
mean_power_u16_aligned u64_generic # 39639 ns/call
|
||||
mean_power_u16_aligned float_generic # 105615 ns/call
|
||||
|
||||
count_above_u16 generic_x86_avx2 # 20 ns/call
|
||||
count_above_u16 generic_generic # 30 ns/call
|
||||
|
||||
count_above_u16_aligned generic_x86_avx2_aligned # 15 ns/call
|
||||
count_above_u16_aligned generic_x86_avx2 # 19 ns/call
|
||||
count_above_u16_aligned generic_generic # 31 ns/call
|
||||
|
|
|
|||
|
|
@ -105,3 +105,13 @@ mean_power_u16_aligned float_armv7a_neon_vfpv4
|
|||
mean_power_u16_aligned u64_generic # 131637 ns/call
|
||||
mean_power_u16_aligned u32_generic # 132092 ns/call
|
||||
mean_power_u16_aligned float_generic # 187127 ns/call
|
||||
|
||||
count_above_u16 neon_armv7a_neon_vfpv4 # 35 ns/call
|
||||
count_above_u16 generic_armv7a_neon_vfpv4 # 56 ns/call
|
||||
count_above_u16 generic_generic # 178 ns/call
|
||||
|
||||
count_above_u16_aligned neon_armv7a_neon_vfpv4_aligned # 34 ns/call
|
||||
count_above_u16_aligned neon_armv7a_neon_vfpv4 # 34 ns/call
|
||||
count_above_u16_aligned generic_armv7a_neon_vfpv4_aligned # 53 ns/call
|
||||
count_above_u16_aligned generic_armv7a_neon_vfpv4 # 53 ns/call
|
||||
count_above_u16_aligned generic_generic # 179 ns/call
|
||||
|
|
|
|||
Loading…
Reference in New Issue