Starch config: Add aarch64
* Added aarch64 to dsp/starchgen.py and Makefile. * Regenerated files
This commit is contained in:
parent
1b1f9de119
commit
1b0bcefae6
26
Makefile
26
Makefile
|
|
@ -142,19 +142,23 @@ ifneq ($(CPUFEATURES),yes)
|
|||
# need to be able to detect CPU features at runtime to enable any non-standard compiler flags
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
else ifeq ($(ARCH),x86_64)
|
||||
# AVX, AVX2
|
||||
STARCH_MIX := x86
|
||||
CPPFLAGS += -DSTARCH_MIX_X86
|
||||
else ifneq (,$(findstring arm,$(ARCH)))
|
||||
# ARMv7 NEON
|
||||
STARCH_MIX := arm
|
||||
CPPFLAGS += -DSTARCH_MIX_ARM
|
||||
else
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
ifeq ($(ARCH),x86_64)
|
||||
# AVX, AVX2
|
||||
STARCH_MIX := x86
|
||||
CPPFLAGS += -DSTARCH_MIX_X86
|
||||
else ifeq ($(findstring arm,$(ARCH)),arm)
|
||||
# ARMv7 NEON
|
||||
STARCH_MIX := arm
|
||||
CPPFLAGS += -DSTARCH_MIX_ARM
|
||||
else ifeq ($(findstring aarch,$(ARCH)),aarch)
|
||||
STARCH_MIX := aarch64
|
||||
CPPFLAGS += -DSTARCH_MIX_AARCH64
|
||||
else
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
endif
|
||||
endif
|
||||
|
||||
all: showconfig dump1090 view1090 starch-benchmark
|
||||
|
||||
STARCH_COMPILE := $(CC) $(CPPFLAGS) $(CFLAGS) -c
|
||||
|
|
|
|||
|
|
@ -1247,10 +1247,10 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
#undef STARCH_ALIGNED
|
||||
|
|
@ -1275,10 +1275,10 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
|
||||
static void starch_benchmark_all_magnitude_uc8(void)
|
||||
{
|
||||
|
|
@ -1375,6 +1375,9 @@ static void starch_benchmark_usage(const char *argv0)
|
|||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
"armv7a_neon_vfpv4 "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_ARMV8_A
|
||||
"armv8_a "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
"x86_avx2 "
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -89,6 +89,15 @@ starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
|
|||
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_armv8_a, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 3, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 4, "exact_armv8_a", "armv8_a", starch_magnitude_uc8_exact_armv8_a, NULL },
|
||||
{ 5, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
|
|
@ -174,6 +183,18 @@ starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] =
|
|||
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_armv8_a, NULL },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 3, "lookup_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_lookup_armv8_a, NULL },
|
||||
{ 4, "lookup_unroll_4_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 5, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_uc8_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 6, "exact_armv8_a_aligned", "armv8_a", starch_magnitude_uc8_aligned_exact_armv8_a, NULL },
|
||||
{ 7, "exact_armv8_a", "armv8_a", starch_magnitude_uc8_exact_armv8_a, NULL },
|
||||
{ 8, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
|
|
@ -258,6 +279,15 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
|
|||
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_a", "armv8_a", starch_magnitude_power_uc8_twopass_armv8_a, NULL },
|
||||
{ 1, "lookup_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_armv8_a, NULL },
|
||||
{ 2, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 3, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 4, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 5, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
|
|
@ -343,6 +373,18 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
|
|||
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_twopass_armv8_a, NULL },
|
||||
{ 1, "lookup_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_lookup_armv8_a, NULL },
|
||||
{ 2, "lookup_unroll_4_armv8_a_aligned", "armv8_a", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 3, "twopass_armv8_a", "armv8_a", starch_magnitude_power_uc8_twopass_armv8_a, NULL },
|
||||
{ 4, "lookup_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_armv8_a, NULL },
|
||||
{ 5, "lookup_unroll_4_armv8_a", "armv8_a", starch_magnitude_power_uc8_lookup_unroll_4_armv8_a, NULL },
|
||||
{ 6, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 7, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 8, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
|
|
@ -424,6 +466,13 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
|
|||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16_exact_u32_armv8_a, NULL },
|
||||
{ 1, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16_exact_float_armv8_a, NULL },
|
||||
{ 2, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 3, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
|
|
@ -503,6 +552,15 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
|
|||
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_a_aligned", "armv8_a", starch_magnitude_sc16_aligned_exact_u32_armv8_a, NULL },
|
||||
{ 1, "exact_float_armv8_a_aligned", "armv8_a", starch_magnitude_sc16_aligned_exact_float_armv8_a, NULL },
|
||||
{ 2, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16_exact_u32_armv8_a, NULL },
|
||||
{ 3, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16_exact_float_armv8_a, NULL },
|
||||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 5, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
|
|
@ -587,6 +645,17 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
|
|||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_u32_armv8_a, NULL },
|
||||
{ 1, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_float_armv8_a, NULL },
|
||||
{ 2, "11bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_11bit_table_armv8_a, NULL },
|
||||
{ 3, "12bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_12bit_table_armv8_a, NULL },
|
||||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 5, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 6, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 7, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
|
|
@ -678,6 +747,21 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
|
|||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_exact_u32_armv8_a, NULL },
|
||||
{ 1, "exact_float_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_exact_float_armv8_a, NULL },
|
||||
{ 2, "11bit_table_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_11bit_table_armv8_a, NULL },
|
||||
{ 3, "12bit_table_armv8_a_aligned", "armv8_a", starch_magnitude_sc16q11_aligned_12bit_table_armv8_a, NULL },
|
||||
{ 4, "exact_u32_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_u32_armv8_a, NULL },
|
||||
{ 5, "exact_float_armv8_a", "armv8_a", starch_magnitude_sc16q11_exact_float_armv8_a, NULL },
|
||||
{ 6, "11bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_11bit_table_armv8_a, NULL },
|
||||
{ 7, "12bit_table_armv8_a", "armv8_a", starch_magnitude_sc16q11_12bit_table_armv8_a, NULL },
|
||||
{ 8, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 9, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 10, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 11, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
|
|
@ -765,6 +849,15 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
|
|||
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "float_armv8_a", "armv8_a", starch_mean_power_u16_float_armv8_a, NULL },
|
||||
{ 1, "u32_armv8_a", "armv8_a", starch_mean_power_u16_u32_armv8_a, NULL },
|
||||
{ 2, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 3, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 4, "u64_armv8_a", "armv8_a", starch_mean_power_u16_u64_armv8_a, NULL },
|
||||
{ 5, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
|
|
@ -850,6 +943,18 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
|
|||
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "u32_armv8_a", "armv8_a", starch_mean_power_u16_u32_armv8_a, NULL },
|
||||
{ 1, "u32_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_u32_armv8_a, NULL },
|
||||
{ 2, "u64_armv8_a", "armv8_a", starch_mean_power_u16_u64_armv8_a, NULL },
|
||||
{ 3, "u64_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_u64_armv8_a, NULL },
|
||||
{ 4, "float_armv8_a", "armv8_a", starch_mean_power_u16_float_armv8_a, NULL },
|
||||
{ 5, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 6, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 7, "float_armv8_a_aligned", "armv8_a", starch_mean_power_u16_aligned_float_armv8_a, NULL },
|
||||
{ 8, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
|
@ -33,9 +33,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
/* starch generated code. Do not edit. */
|
||||
|
||||
#define STARCH_FLAVOR_ARMV8_A
|
||||
|
||||
#include "starch.h"
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
||||
#define STARCH_ALIGNMENT 1
|
||||
#define STARCH_ALIGNED(_ptr) (_ptr)
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_a
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_a
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
#undef STARCH_ALIGNED
|
||||
#undef STARCH_SYMBOL
|
||||
#undef STARCH_IMPL
|
||||
#undef STARCH_IMPL_REQUIRES
|
||||
|
||||
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
|
||||
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_a
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_a
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
@ -13,9 +13,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
|
@ -32,9 +32,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_ARM
|
||||
|
||||
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
|
||||
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_X86
|
||||
|
||||
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -19,6 +19,13 @@
|
|||
#define STARCH_MIX_ALIGNMENT 16
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
/* AARCH64 */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#define STARCH_FLAVOR_ARMV8_A
|
||||
#define STARCH_FLAVOR_GENERIC
|
||||
#define STARCH_MIX_ALIGNMENT 32
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
/* x64 */
|
||||
#ifdef STARCH_MIX_X86
|
||||
#define STARCH_FLAVOR_X86_AVX2
|
||||
|
|
@ -191,35 +198,27 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
|
|||
/* flavors and prototypes */
|
||||
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_GENERIC */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
int cpu_supports_armv7_neon_vfpv4 (void);
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -228,14 +227,12 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
|
|||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -246,36 +243,73 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV8_A
|
||||
void starch_magnitude_power_uc8_twopass_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_float_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_11bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_a ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv8_a ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv8_a ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_ARMV8_A */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
int cpu_supports_avx2 (void);
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -284,10 +318,18 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
|
|||
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_X86_AVX2 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,11 @@ gen.add_flavor(name = 'armv7a_neon_vfpv4',
|
|||
features = ['neon'],
|
||||
test_function = 'cpu_supports_armv7_neon_vfpv4',
|
||||
alignment = 16)
|
||||
gen.add_flavor(name = 'armv8_a',
|
||||
description = 'ARMv8-A',
|
||||
compile_flags = ['-ffast-math'],
|
||||
features = [],
|
||||
alignment = 32)
|
||||
gen.add_flavor(name = 'x86_avx2',
|
||||
description = 'x86 with AVX2',
|
||||
compile_flags = ['-mavx2', '-ffast-math'],
|
||||
|
|
@ -48,6 +53,11 @@ gen.add_mix(name = 'arm',
|
|||
flavors = ['armv7a_neon_vfpv4', 'generic'],
|
||||
wisdom_file = 'wisdom.arm')
|
||||
|
||||
gen.add_mix(name = 'aarch64',
|
||||
description = 'AARCH64',
|
||||
flavors = ['armv8_a', 'generic'],
|
||||
wisdom_file = 'wisdom.aarch64')
|
||||
|
||||
gen.add_mix(name = 'x86',
|
||||
description = 'x64',
|
||||
flavors = ['x86_avx2', 'generic'],
|
||||
|
|
|
|||
Loading…
Reference in New Issue