Use a starch implementation for the burst-detection sample counting loop.

This commit is contained in:
Oliver Jowett 2021-07-08 18:53:02 +08:00
parent 1cb4284e6c
commit 2480e5169c
21 changed files with 653 additions and 21 deletions

View File

@ -292,12 +292,8 @@ static void adaptive_burst_scan_windows(uint16_t *buf, unsigned windows)
// return the number of loud samples seen
static inline unsigned adaptive_burst_count_samples(uint16_t *buf, unsigned n)
{
unsigned counter = 0;
while (n--) {
if (buf[0] > 46395) // -3dBFS
++counter;
++buf;
}
unsigned counter;
starch_count_above_u16(buf, n, 46395 /* -3dBFS */, &counter);
return counter;
}

View File

@ -0,0 +1,39 @@
#include <stdlib.h>
void STARCH_BENCHMARK(count_above_u16) (void)
{
uint16_t *in = NULL;
const unsigned len = 96; /* Typical use is with short burst windows (40us) */
const unsigned threshold = 46395; /* -3dBFS */
if (!(in = STARCH_BENCHMARK_ALLOC(len, uint16_t))) {
goto done;
}
srand(1);
for (unsigned i = 0; i < len; ++i) {
in[i] = rand() % 65536;
}
unsigned count;
STARCH_BENCHMARK_RUN( count_above_u16, in, len, threshold, &count );
done:
STARCH_BENCHMARK_FREE(in);
}
bool STARCH_BENCHMARK_VERIFY(count_above_u16) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
{
unsigned expected = 0;
for (unsigned i = 0; i < len; ++i) {
if (in[i] >= threshold)
++expected;
}
if (expected != *out_count) {
fprintf(stderr, "verification failed: expected count %u, got count %u\n", expected, *out_count);
return false;
}
return true;
}

View File

@ -112,6 +112,230 @@ static bool starch_benchmark_flavor_in_list(const char *flavor, const starch_ben
}
/* prototypes for benchmark helpers provided by user code */
void starch_count_above_u16_benchmark (void);
bool starch_count_above_u16_benchmark_verify ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_count_above_u16_benchmark(void);
static void starch_benchmark_one_count_above_u16( starch_count_above_u16_regentry * _entry, const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
/* verify correctness of the output */
if (! starch_count_above_u16_benchmark_verify ( arg0, arg1, arg2, arg3 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "count_above_u16";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_count_above_u16( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
{
for (starch_count_above_u16_regentry *_entry = starch_count_above_u16_registry; _entry->name; ++_entry) {
starch_benchmark_one_count_above_u16( _entry, arg0, arg1, arg2, arg3 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_count_above_u16_aligned_benchmark (void);
bool starch_count_above_u16_aligned_benchmark_verify ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
/* prototype the benchmarking function so that we can build with -Wmissing-declarations */
void starch_count_above_u16_aligned_benchmark(void);
static void starch_benchmark_one_count_above_u16_aligned( starch_count_above_u16_aligned_regentry * _entry, const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
{
fprintf(stderr, " %-40s ", _entry->name);
/* test for support */
if (_entry->flavor_supported && !(_entry->flavor_supported())) {
fprintf(stderr, "unsupported\n");
return;
}
if (starch_benchmark_flavor_whitelist && !starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_whitelist)) {
fprintf(stderr, "skipped (not whitelisted)\n");
return;
}
if (starch_benchmark_flavor_blacklist && starch_benchmark_flavor_in_list(_entry->flavor, starch_benchmark_flavor_blacklist)) {
fprintf(stderr, "skipped (blacklisted)\n");
return;
}
if (starch_benchmark_list_only) {
fprintf(stderr, "supported\n");
return;
}
/* initial warmup */
for (unsigned _loop = 0; _loop < starch_benchmark_warmup_loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
/* verify correctness of the output */
if (! starch_count_above_u16_aligned_benchmark_verify ( arg0, arg1, arg2, arg3 )) {
fprintf(stderr, "skipped (verification failed)\n");
starch_benchmark_validation_failed = true;
return;
}
if (starch_benchmark_validate_only) {
fprintf(stderr, "validation ok\n");
return;
}
/* pre-benchmark, find a loop count that takes at least 100ms */
starch_benchmark_time _start, _end;
uint64_t _elapsed = 0;
uint64_t _loops = 127;
while (_elapsed < 100000000) {
_loops *= 2;
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
starch_benchmark_get_time(&_end);
_elapsed = starch_benchmark_elapsed(&_start, &_end);
}
/* real benchmark, run for approx 1 second */
_loops = _loops * 1000000000 / _elapsed;
_elapsed = 0;
uint64_t _elapsed_min = UINT64_MAX;
uint64_t _elapsed_max = 0;
for (unsigned _iter = 0; _iter < starch_benchmark_iterations; ++_iter) {
starch_benchmark_get_time(&_start);
for (uint64_t _loop = 0; _loop < _loops; ++_loop)
_entry->callable ( arg0, arg1, arg2, arg3 );
starch_benchmark_get_time(&_end);
uint64_t _elapsed_one = starch_benchmark_elapsed(&_start, &_end);
if (_elapsed_one < _elapsed_min)
_elapsed_min = _elapsed_one;
if (_elapsed_one > _elapsed_max)
_elapsed_max = _elapsed_one;
_elapsed += _elapsed_one;
}
uint64_t _per_loop;
if (starch_benchmark_iterations > 2)
_per_loop = (_elapsed - _elapsed_min - _elapsed_max) / _loops / (starch_benchmark_iterations - 2);
else
_per_loop = _elapsed / _loops / starch_benchmark_iterations;
fprintf(stderr, "%" PRIu64 " ns/call\n", _per_loop);
if (starch_benchmark_result_count >= starch_benchmark_result_size) {
if (!starch_benchmark_result_size)
starch_benchmark_result_size = 64;
else
starch_benchmark_result_size *= 2;
starch_benchmark_results = realloc(starch_benchmark_results, starch_benchmark_result_size * sizeof(*starch_benchmark_results));
if (!starch_benchmark_results) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
exit(1);
}
}
starch_benchmark_results[starch_benchmark_result_count].name = "count_above_u16_aligned";
starch_benchmark_results[starch_benchmark_result_count].impl = _entry->name;
starch_benchmark_results[starch_benchmark_result_count].ns = _per_loop;
++starch_benchmark_result_count;
}
static void starch_benchmark_run_count_above_u16_aligned( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 )
{
for (starch_count_above_u16_aligned_regentry *_entry = starch_count_above_u16_aligned_registry; _entry->name; ++_entry) {
starch_benchmark_one_count_above_u16_aligned( _entry, arg0, arg1, arg2, arg3 );
}
}
/* prototypes for benchmark helpers provided by user code */
void starch_magnitude_power_uc8_benchmark (void);
bool starch_magnitude_power_uc8_benchmark_verify ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -1246,6 +1470,7 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(1, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/count_above_u16_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
@ -1274,12 +1499,23 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(STARCH_MIX_ALIGNMENT, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/count_above_u16_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
static void starch_benchmark_all_count_above_u16(void)
{
fprintf(stderr, "==== count_above_u16 ===\n");
starch_count_above_u16_benchmark ();
}
static void starch_benchmark_all_count_above_u16_aligned(void)
{
fprintf(stderr, "==== count_above_u16_aligned ===\n");
starch_count_above_u16_aligned_benchmark ();
}
static void starch_benchmark_all_magnitude_power_uc8(void)
{
fprintf(stderr, "==== magnitude_power_uc8 ===\n");
@ -1383,6 +1619,8 @@ static void starch_benchmark_usage(const char *argv0)
#endif
"\n"
"Supported functions: "
"count_above_u16 "
"count_above_u16_aligned "
"magnitude_power_uc8 "
"magnitude_power_uc8_aligned "
"magnitude_sc16 "
@ -1478,6 +1716,16 @@ int main(int argc, char **argv)
}
for (int i = optind; i < argc; ++i) {
if (!strcmp(argv[i], "count_above_u16")) {
specific = 1;
starch_benchmark_all_count_above_u16();
continue;
}
if (!strcmp(argv[i], "count_above_u16_aligned")) {
specific = 1;
starch_benchmark_all_count_above_u16_aligned();
continue;
}
if (!strcmp(argv[i], "magnitude_power_uc8")) {
specific = 1;
starch_benchmark_all_magnitude_power_uc8();
@ -1534,6 +1782,8 @@ int main(int argc, char **argv)
}
if (!specific) {
starch_benchmark_all_count_above_u16();
starch_benchmark_all_count_above_u16_aligned();
starch_benchmark_all_magnitude_power_uc8();
starch_benchmark_all_magnitude_power_uc8_aligned();
starch_benchmark_all_magnitude_sc16();

View File

@ -19,6 +19,165 @@ static int starch_regentry_rank_compare (const void *l, const void *r)
return left->rank - right->rank;
}
/* dispatcher / registry for count_above_u16 */
starch_count_above_u16_regentry * starch_count_above_u16_select() {
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_count_above_u16_dispatch ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 ) {
starch_count_above_u16_regentry *entry = starch_count_above_u16_select();
if (!entry)
abort();
starch_count_above_u16 = entry->callable;
starch_count_above_u16 ( arg0, arg1, arg2, arg3 );
}
starch_count_above_u16_ptr starch_count_above_u16 = starch_count_above_u16_dispatch;
void starch_count_above_u16_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_count_above_u16_regentry *entry;
for (entry = starch_count_above_u16_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_count_above_u16_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_count_above_u16_registry, entry - starch_count_above_u16_registry, sizeof(starch_count_above_u16_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_count_above_u16 = starch_count_above_u16_dispatch;
}
starch_count_above_u16_regentry starch_count_above_u16_registry[] = {
#ifdef STARCH_MIX_AARCH64
{ 0, "generic_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_generic_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "neon_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_neon_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
{ 2, "generic_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_GENERIC
{ 0, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "generic_x86_avx2", "x86_avx2", starch_count_above_u16_generic_x86_avx2, cpu_supports_avx2 },
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for count_above_u16_aligned */
starch_count_above_u16_aligned_regentry * starch_count_above_u16_aligned_select() {
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry;
entry->name;
++entry)
{
if (entry->flavor_supported && !(entry->flavor_supported()))
continue;
return entry;
}
return NULL;
}
static void starch_count_above_u16_aligned_dispatch ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 ) {
starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_select();
if (!entry)
abort();
starch_count_above_u16_aligned = entry->callable;
starch_count_above_u16_aligned ( arg0, arg1, arg2, arg3 );
}
starch_count_above_u16_aligned_ptr starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
void starch_count_above_u16_aligned_set_wisdom (const char * const * received_wisdom)
{
/* re-rank the registry based on received wisdom */
starch_count_above_u16_aligned_regentry *entry;
for (entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
const char * const *search;
for (search = received_wisdom; *search; ++search) {
if (!strcmp(*search, entry->name)) {
break;
}
}
if (*search) {
/* matches an entry in the wisdom list, order by position in the list */
entry->rank = search - received_wisdom;
} else {
/* no match, rank after all possible matches, retaining existing order */
entry->rank = (search - received_wisdom) + (entry - starch_count_above_u16_aligned_registry);
}
}
/* re-sort based on the new ranking */
qsort(starch_count_above_u16_aligned_registry, entry - starch_count_above_u16_aligned_registry, sizeof(starch_count_above_u16_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
}
starch_count_above_u16_aligned_regentry starch_count_above_u16_aligned_registry[] = {
#ifdef STARCH_MIX_AARCH64
{ 0, "generic_armv8_neon_simd_aligned", "armv8_neon_simd", starch_count_above_u16_aligned_generic_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "neon_armv8_neon_simd_aligned", "armv8_neon_simd", starch_count_above_u16_aligned_neon_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "generic_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_generic_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "neon_armv8_neon_simd", "armv8_neon_simd", starch_count_above_u16_neon_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_ARM
{ 0, "neon_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
{ 2, "generic_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_count_above_u16_aligned_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 3, "neon_armv7a_neon_vfpv4_aligned", "armv7a_neon_vfpv4", starch_count_above_u16_aligned_neon_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
{ 4, "generic_armv7a_neon_vfpv4", "armv7a_neon_vfpv4", starch_count_above_u16_generic_armv7a_neon_vfpv4, cpu_supports_armv7_neon_vfpv4 },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_GENERIC
{ 0, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
#endif /* STARCH_MIX_GENERIC */
#ifdef STARCH_MIX_X86
{ 0, "generic_x86_avx2_aligned", "x86_avx2", starch_count_above_u16_aligned_generic_x86_avx2, cpu_supports_avx2 },
{ 1, "generic_generic", "generic", starch_count_above_u16_generic_generic, NULL },
{ 2, "generic_x86_avx2", "x86_avx2", starch_count_above_u16_generic_x86_avx2, cpu_supports_avx2 },
#endif /* STARCH_MIX_X86 */
{ 0, NULL, NULL, NULL, NULL }
};
/* dispatcher / registry for magnitude_power_uc8 */
starch_magnitude_power_uc8_regentry * starch_magnitude_power_uc8_select() {
@ -992,6 +1151,14 @@ int starch_read_wisdom (const char * path)
return -1;
/* reset all ranks to identify entries not listed in the wisdom file; we'll assign ranks at the end to produce a stable sort */
int rank_count_above_u16 = 0;
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_count_above_u16_aligned = 0;
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
entry->rank = 0;
}
int rank_magnitude_power_uc8 = 0;
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
entry->rank = 0;
@ -1065,6 +1232,24 @@ int starch_read_wisdom (const char * path)
*end = 0;
/* try to find a matching registry entry */
if (!strcmp(name, "count_above_u16")) {
for (starch_count_above_u16_regentry *entry = starch_count_above_u16_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_count_above_u16;
break;
}
}
continue;
}
if (!strcmp(name, "count_above_u16_aligned")) {
for (starch_count_above_u16_aligned_regentry *entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
entry->rank = ++rank_count_above_u16_aligned;
break;
}
}
continue;
}
if (!strcmp(name, "magnitude_power_uc8")) {
for (starch_magnitude_power_uc8_regentry *entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {
if (!strcmp(impl, entry->name)) {
@ -1165,6 +1350,28 @@ int starch_read_wisdom (const char * path)
fclose(fp);
/* assign ranks to unmatched items to (stable) sort them last; re-sort everything */
{
starch_count_above_u16_regentry *entry;
for (entry = starch_count_above_u16_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_count_above_u16;
}
qsort(starch_count_above_u16_registry, entry - starch_count_above_u16_registry, sizeof(starch_count_above_u16_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_count_above_u16 = starch_count_above_u16_dispatch;
}
{
starch_count_above_u16_aligned_regentry *entry;
for (entry = starch_count_above_u16_aligned_registry; entry->name; ++entry) {
if (!entry->rank)
entry->rank = ++rank_count_above_u16_aligned;
}
qsort(starch_count_above_u16_aligned_registry, entry - starch_count_above_u16_aligned_registry, sizeof(starch_count_above_u16_aligned_regentry), starch_regentry_rank_compare);
/* reset the implementation pointer so the next call will re-select */
starch_count_above_u16_aligned = starch_count_above_u16_aligned_dispatch;
}
{
starch_magnitude_power_uc8_regentry *entry;
for (entry = starch_magnitude_power_uc8_registry; entry->name; ++entry) {

View File

@ -14,6 +14,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
@ -33,6 +34,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"

View File

@ -14,6 +14,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_neon_simd
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
@ -33,6 +34,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_neon_simd
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"

View File

@ -13,6 +13,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"

View File

@ -13,6 +13,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
@ -32,6 +33,7 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/count_above_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_AARCH64
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.armv8_neon_simd.o: dsp/generated/flavor.armv8_neon_simd.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv8-a+simd -ffast-math dsp/generated/flavor.armv8_neon_simd.c -o dsp/generated/flavor.armv8_neon_simd.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv8_neon_simd.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_ARM
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,16 +21,16 @@
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_X86
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/count_above_u16.c dsp/impl/magnitude_sc16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/count_above_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

41
dsp/generated/starch.h generated
View File

@ -195,6 +195,36 @@ extern starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_regi
starch_mean_power_u16_aligned_regentry * starch_mean_power_u16_aligned_select();
void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wisdom );
typedef void (* starch_count_above_u16_ptr) ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
extern starch_count_above_u16_ptr starch_count_above_u16;
typedef struct {
int rank;
const char *name;
const char *flavor;
starch_count_above_u16_ptr callable;
int (*flavor_supported)();
} starch_count_above_u16_regentry;
extern starch_count_above_u16_regentry starch_count_above_u16_registry[];
starch_count_above_u16_regentry * starch_count_above_u16_select();
void starch_count_above_u16_set_wisdom( const char * const * received_wisdom );
typedef void (* starch_count_above_u16_aligned_ptr) ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
extern starch_count_above_u16_aligned_ptr starch_count_above_u16_aligned;
typedef struct {
int rank;
const char *name;
const char *flavor;
starch_count_above_u16_aligned_ptr callable;
int (*flavor_supported)();
} starch_count_above_u16_aligned_regentry;
extern starch_count_above_u16_aligned_regentry starch_count_above_u16_aligned_registry[];
starch_count_above_u16_aligned_regentry * starch_count_above_u16_aligned_select();
void starch_count_above_u16_aligned_set_wisdom( const char * const * received_wisdom );
/* flavors and prototypes */
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
@ -233,6 +263,10 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_count_above_u16_generic_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_aligned_generic_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_neon_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_aligned_neon_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -279,6 +313,10 @@ void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0,
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_count_above_u16_generic_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_aligned_generic_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_neon_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_aligned_neon_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -303,6 +341,7 @@ void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_count_above_u16_generic_generic ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
#endif /* STARCH_FLAVOR_GENERIC */
@ -337,6 +376,8 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_count_above_u16_generic_x86_avx2 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_count_above_u16_aligned_generic_x86_avx2 ( const uint16_t * arg0, unsigned arg1, uint16_t arg2, unsigned * arg3 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );

View File

@ -0,0 +1,58 @@
/*
* Count the number of samples in a uint16_t buffer that are >= a threshold.
*/
void STARCH_IMPL(count_above_u16, generic) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
{
const uint16_t * restrict in_align = STARCH_ALIGNED(in);
unsigned count = 0;
while (len--) {
if (in_align[0] >= threshold)
++count;
++in_align;
}
*out_count = count;
}
#ifdef STARCH_FEATURE_NEON
#include <arm_neon.h>
void STARCH_IMPL_REQUIRES(count_above_u16, neon, STARCH_FEATURE_NEON) (const uint16_t *in, unsigned len, uint16_t threshold, unsigned *out_count)
{
const uint16_t * restrict in_align = STARCH_ALIGNED(in);
const uint16x8_t threshold_x8 = vdupq_n_u16(threshold);
int32x4_t accumulator0 = vdupq_n_s32(0);
int32x4_t accumulator1 = vdupq_n_s32(0);
unsigned len8 = len >> 3;
while (len8--) {
uint16x8_t mag = vld1q_u16(in_align);
int16x8_t compare = vreinterpretq_s16_u16(vcgeq_u16(mag, threshold_x8));
accumulator0 = vsubw_s16(accumulator0, vget_low_s16(compare));
accumulator1 = vsubw_s16(accumulator1, vget_high_s16(compare));
in_align += 8;
}
// sum accumulators across all lanes
int32x4_t sum2 = vaddq_s32(accumulator0, accumulator1);
int32x2_t sum4 = vadd_s32(vget_low_s32(sum2), vget_high_s32(sum2));
int32x2_t sum8 = vpadd_s32(sum4, sum4);
int32x4_t sum8_x2 = vcombine_s32(sum8, sum8);
unsigned len1 = len & 7;
while (len1--) {
uint16x4_t mag = vld1_dup_u16(in_align);
int16x4_t compare = vreinterpret_s16_u16(vcge_u16(mag, vget_low_u16(threshold_x8)));
sum8_x2 = vsubw_s16(sum8_x2, compare);
in_align += 1;
}
*out_count = vgetq_lane_s32(sum8_x2, 0);
}
#endif

View File

@ -20,6 +20,7 @@ gen.add_function(name = 'magnitude_power_uc8', argtypes = ['const uc8_t *', 'uin
gen.add_function(name = 'magnitude_sc16', argtypes = ['const sc16_t *', 'uint16_t *', 'unsigned'], aligned = True)
gen.add_function(name = 'magnitude_sc16q11', argtypes = ['const sc16_t *', 'uint16_t *', 'unsigned'], aligned = True)
gen.add_function(name = 'mean_power_u16', argtypes = ['const uint16_t *', 'unsigned', 'double *', 'double *'], aligned = True)
gen.add_function(name = 'count_above_u16', argtypes = ['const uint16_t *', 'unsigned', 'uint16_t', 'unsigned *'], aligned = True)
gen.add_feature(name='neon', description='ARM NEON')

View File

@ -305,6 +305,7 @@ static void showDSP()
SHOW(magnitude_sc16);
SHOW(magnitude_sc16q11);
SHOW(mean_power_u16);
SHOW(count_above_u16);
#undef SHOW

View File

@ -29,3 +29,9 @@ mean_power_u16 u64_generic
mean_power_u16_aligned u32_armv7a_neon_vfpv4_aligned # 44929 ns/call
mean_power_u16_aligned u64_generic # 934445 ns/call
count_above_u16 neon_armv7a_neon_vfpv4 # 35 ns/call
count_above_u16 generic_generic # 178 ns/call
count_above_u16_aligned neon_armv7a_neon_vfpv4 # 34 ns/call
count_above_u16_aligned generic_generic # 179 ns/call

View File

@ -14,3 +14,6 @@ magnitude_uc8_aligned lookup_unroll_4_generic
mean_power_u16 u32_generic
mean_power_u16_aligned u32_generic
count_above_u16 generic_generic
count_above_u16_aligned generic_generic

View File

@ -29,3 +29,9 @@ mean_power_u16 u32_generic
mean_power_u16_aligned u32_x86_avx2_aligned # 11572 ns/call
mean_power_u16_aligned u32_generic # 18207 ns/call
count_above_u16 generic_x86_avx2 # 20 ns/call
count_above_u16 generic_generic # 30 ns/call
count_above_u16_aligned generic_x86_avx2_aligned # 15 ns/call
count_above_u16_aligned generic_generic # 31 ns/call

View File

@ -88,3 +88,10 @@ mean_power_u16_aligned u64_x86_avx2_aligned
mean_power_u16_aligned u64_x86_avx2 # 31283 ns/call
mean_power_u16_aligned u64_generic # 39639 ns/call
mean_power_u16_aligned float_generic # 105615 ns/call
count_above_u16 generic_x86_avx2 # 20 ns/call
count_above_u16 generic_generic # 30 ns/call
count_above_u16_aligned generic_x86_avx2_aligned # 15 ns/call
count_above_u16_aligned generic_x86_avx2 # 19 ns/call
count_above_u16_aligned generic_generic # 31 ns/call

View File

@ -105,3 +105,13 @@ mean_power_u16_aligned float_armv7a_neon_vfpv4
mean_power_u16_aligned u64_generic # 131637 ns/call
mean_power_u16_aligned u32_generic # 132092 ns/call
mean_power_u16_aligned float_generic # 187127 ns/call
count_above_u16 neon_armv7a_neon_vfpv4 # 35 ns/call
count_above_u16 generic_armv7a_neon_vfpv4 # 56 ns/call
count_above_u16 generic_generic # 178 ns/call
count_above_u16_aligned neon_armv7a_neon_vfpv4_aligned # 34 ns/call
count_above_u16_aligned neon_armv7a_neon_vfpv4 # 34 ns/call
count_above_u16_aligned generic_armv7a_neon_vfpv4_aligned # 53 ns/call
count_above_u16_aligned generic_armv7a_neon_vfpv4 # 53 ns/call
count_above_u16_aligned generic_generic # 179 ns/call