Starch config: Add aarch64

* Added aarch64 to dsp/starchgen.py and Makefile.
* Regenerated files
This commit is contained in:
George Joseph 2021-02-07 17:07:17 -07:00
parent 1b1f9de119
commit 1b0bcefae6
12 changed files with 293 additions and 89 deletions

View File

@ -142,19 +142,23 @@ ifneq ($(CPUFEATURES),yes)
# need to be able to detect CPU features at runtime to enable any non-standard compiler flags
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
else ifeq ($(ARCH),x86_64)
# AVX, AVX2
STARCH_MIX := x86
CPPFLAGS += -DSTARCH_MIX_X86
else ifneq (,$(findstring arm,$(ARCH)))
# ARMv7 NEON
STARCH_MIX := arm
CPPFLAGS += -DSTARCH_MIX_ARM
else
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
ifeq ($(ARCH),x86_64)
# AVX, AVX2
STARCH_MIX := x86
CPPFLAGS += -DSTARCH_MIX_X86
else ifeq ($(findstring arm,$(ARCH)),arm)
# ARMv7 NEON
STARCH_MIX := arm
CPPFLAGS += -DSTARCH_MIX_ARM
else ifeq ($(findstring aarch,$(ARCH)),aarch)
STARCH_MIX := aarch64
CPPFLAGS += -DSTARCH_MIX_AARCH64
else
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
endif
endif
all: showconfig dump1090 view1090 starch-benchmark
STARCH_COMPILE := $(CC) $(CPPFLAGS) $(CFLAGS) -c

View File

@ -1247,10 +1247,10 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
@ -1275,10 +1275,10 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
static void starch_benchmark_all_magnitude_uc8(void)
{
@ -1375,6 +1375,9 @@ static void starch_benchmark_usage(const char *argv0)
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
"armv7a_neon_vfpv4 "
#endif
#ifdef STARCH_FLAVOR_ARMV8_A
"armv8_a "
#endif
#ifdef STARCH_FLAVOR_X86_AVX2
"x86_avx2 "
#endif

View File

@ -89,6 +89,15 @@ starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_armv8_a, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_unroll_4_armv8_a, NULL },
{ 3, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 4, "exact_armv8_a", "armv8_a", starch_magnitude_uc8_exact_armv8_a, NULL },
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
@ -174,6 +183,18 @@ starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] =
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_armv8_a, NULL },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 3, "lookup_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_lookup_armv8_a, NULL },
{ 4, "lookup_unroll_4_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_a, NULL },
{ 5, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_unroll_4_armv8_a, NULL },
{ 6, "exact_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_exact_armv8_a, NULL },
{ 7, "exact_armv8_a", "armv8_a", starch_magnitude_uc8_exact_armv8_a, NULL },
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
@ -258,6 +279,15 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_a", "armv8_a", starch_magnitude_power_uc8_twopass_armv8_a, NULL },
{ 1, "lookup_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_armv8_a, NULL },
{ 2, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_unroll_4_armv8_a, NULL },
{ 3, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 4, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 5, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
@ -343,6 +373,18 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_twopass_armv8_a, NULL },
{ 1, "lookup_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_lookup_armv8_a, NULL },
{ 2, "lookup_unroll_4_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_a, NULL },
{ 3, "twopass_armv8_a", "armv8_a", starch_magnitude_power_uc8_twopass_armv8_a, NULL },
{ 4, "lookup_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_armv8_a, NULL },
{ 5, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_unroll_4_armv8_a, NULL },
{ 6, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 7, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 8, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
@ -424,6 +466,13 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16_exact_u32_armv8_a, NULL },
{ 1, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16_exact_float_armv8_a, NULL },
{ 2, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 3, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
@ -503,6 +552,15 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_a_aligned", "armv8_a", starch_magnitude_sc16_aligned_exact_u32_armv8_a, NULL },
{ 1, "exact_float_armv8_a_aligned", "armv8_a", starch_magnitude_sc16_aligned_exact_float_armv8_a, NULL },
{ 2, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16_exact_u32_armv8_a, NULL },
{ 3, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16_exact_float_armv8_a, NULL },
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 5, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
@ -587,6 +645,17 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_u32_armv8_a, NULL },
{ 1, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_float_armv8_a, NULL },
{ 2, "11bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_11bit_table_armv8_a, NULL },
{ 3, "12bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_12bit_table_armv8_a, NULL },
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 5, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 6, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 7, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
@ -678,6 +747,21 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_exact_u32_armv8_a, NULL },
{ 1, "exact_float_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_exact_float_armv8_a, NULL },
{ 2, "11bit_table_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_11bit_table_armv8_a, NULL },
{ 3, "12bit_table_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_12bit_table_armv8_a, NULL },
{ 4, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_u32_armv8_a, NULL },
{ 5, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_float_armv8_a, NULL },
{ 6, "11bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_11bit_table_armv8_a, NULL },
{ 7, "12bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_12bit_table_armv8_a, NULL },
{ 8, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 9, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 10, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 11, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
@ -765,6 +849,15 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "float_armv8_a", "armv8_a", starch_mean_power_u16_float_armv8_a, NULL },
{ 1, "u32_armv8_a", "armv8_a", starch_mean_power_u16_u32_armv8_a, NULL },
{ 2, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 3, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 4, "u64_armv8_a", "armv8_a", starch_mean_power_u16_u64_armv8_a, NULL },
{ 5, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
@ -850,6 +943,18 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "u32_armv8_a", "armv8_a", starch_mean_power_u16_u32_armv8_a, NULL },
{ 1, "u32_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_u32_armv8_a, NULL },
{ 2, "u64_armv8_a", "armv8_a", starch_mean_power_u16_u64_armv8_a, NULL },
{ 3, "u64_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_u64_armv8_a, NULL },
{ 4, "float_armv8_a", "armv8_a", starch_mean_power_u16_float_armv8_a, NULL },
{ 5, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 6, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 7, "float_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_float_armv8_a, NULL },
{ 8, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },

View File

@ -14,11 +14,11 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
@ -33,9 +33,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -0,0 +1,40 @@
/* starch generated code. Do not edit. */
#define STARCH_FLAVOR_ARMV8_A
#include "starch.h"
#undef STARCH_ALIGNMENT
#define STARCH_ALIGNMENT 1
#define STARCH_ALIGNED(_ptr) (_ptr)
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_a
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_a
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
#undef STARCH_SYMBOL
#undef STARCH_IMPL
#undef STARCH_IMPL_REQUIRES
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_a
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_a
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -13,9 +13,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -13,11 +13,11 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
@ -32,9 +32,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_ARM
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,16 +21,16 @@
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_X86
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -19,6 +19,13 @@
#define STARCH_MIX_ALIGNMENT 16
#endif /* STARCH_MIX_ARM */
/* AARCH64 */
#ifdef STARCH_MIX_AARCH64
#define STARCH_FLAVOR_ARMV8_A
#define STARCH_FLAVOR_GENERIC
#define STARCH_MIX_ALIGNMENT 32
#endif /* STARCH_MIX_AARCH64 */
/* x64 */
#ifdef STARCH_MIX_X86
#define STARCH_FLAVOR_X86_AVX2
@ -191,35 +198,27 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
/* flavors and prototypes */
#ifdef STARCH_FLAVOR_GENERIC
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_GENERIC */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
int cpu_supports_armv7_neon_vfpv4 (void);
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -228,14 +227,12 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -246,36 +243,73 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV8_A
void starch_magnitude_power_uc8_twopass_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_11bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_ARMV8_A */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_X86_AVX2
int cpu_supports_avx2 (void);
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -284,10 +318,18 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_X86_AVX2 */
int starch_read_wisdom (const char * path);

View File

@ -32,6 +32,11 @@ gen.add_flavor(name = 'armv7a_neon_vfpv4',
features = ['neon'],
test_function = 'cpu_supports_armv7_neon_vfpv4',
alignment = 16)
gen.add_flavor(name = 'armv8_a',
description = 'ARMv8-A',
compile_flags = ['-ffast-math'],
features = [],
alignment = 32)
gen.add_flavor(name = 'x86_avx2',
description = 'x86 with AVX2',
compile_flags = ['-mavx2', '-ffast-math'],
@ -48,6 +53,11 @@ gen.add_mix(name = 'arm',
flavors = ['armv7a_neon_vfpv4', 'generic'],
wisdom_file = 'wisdom.arm')
gen.add_mix(name = 'aarch64',
description = 'AARCH64',
flavors = ['armv8_a', 'generic'],
wisdom_file = 'wisdom.aarch64')
gen.add_mix(name = 'x86',
description = 'x64',
flavors = ['x86_avx2', 'generic'],