Merge pull request #108 from gtjoseph/dev-fa-starch-aarch64

Starch config: Add aarch64
This commit is contained in:
Oliver Jowett 2021-02-09 14:08:52 +08:00 committed by GitHub
commit 5600d3fc71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 602 additions and 89 deletions

View File

@ -141,19 +141,23 @@ ifneq ($(CPUFEATURES),yes)
# need to be able to detect CPU features at runtime to enable any non-standard compiler flags
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
else ifeq ($(ARCH),x86_64)
# AVX, AVX2
STARCH_MIX := x86
CPPFLAGS += -DSTARCH_MIX_X86
else ifneq (,$(findstring arm,$(ARCH)))
# ARMv7 NEON
STARCH_MIX := arm
CPPFLAGS += -DSTARCH_MIX_ARM
else
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
ifeq ($(ARCH),x86_64)
# AVX, AVX2
STARCH_MIX := x86
CPPFLAGS += -DSTARCH_MIX_X86
else ifeq ($(findstring arm,$(ARCH)),arm)
# ARMv7 NEON
STARCH_MIX := arm
CPPFLAGS += -DSTARCH_MIX_ARM
else ifeq ($(findstring aarch,$(ARCH)),aarch)
STARCH_MIX := aarch64
CPPFLAGS += -DSTARCH_MIX_AARCH64
else
STARCH_MIX := generic
CPPFLAGS += -DSTARCH_MIX_GENERIC
endif
endif
all: showconfig dump1090 view1090 starch-benchmark
STARCH_COMPILE := $(CC) $(CPPFLAGS) $(CFLAGS) -c

View File

@ -25,5 +25,9 @@ ifneq (,$(findstring arm,$(CPUFEATURES_ARCH)))
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_arm.o
endif
ifneq (,$(findstring aarch64,$(CPUFEATURES_ARCH)))
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_aarch64.o
endif
$(CPUFEATURES_OBJS): override CFLAGS := $(CPUFEATURES_CFLAGS)
$(CPUFEATURES_OBJS): override CPPFLAGS := -Icpu_features/include

31
cpu.c
View File

@ -76,3 +76,34 @@ int cpu_supports_armv7_neon_vfpv4(void)
return 0;
#endif
}
//
// AARCH64
//
#ifdef CPU_FEATURES_ARCH_AARCH64
#include "cpuinfo_aarch64.h"
static Aarch64Info *aarch64_info()
{
static bool valid = false;
static Aarch64Info cache;
if (!valid) {
cache = GetAarch64Info();
valid = true;
}
return &cache;
}
#endif
int cpu_supports_armv8_simd(void)
{
#ifdef CPU_FEATURES_ARCH_AARCH64
return aarch64_info()->features.asimd;
#else
return 0;
#endif
}

4
cpu.h
View File

@ -8,4 +8,8 @@ int cpu_supports_avx2(void);
// ARM
int cpu_supports_armv7_neon_vfpv4(void);
// AARCH64
int cpu_supports_armv8_simd(void);
int cpu_supports_armv8_simd_sve(void);
#endif

View File

@ -1246,11 +1246,11 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(1, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
@ -1274,11 +1274,11 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(STARCH_MIX_ALIGNMENT, alignof(_type), (_count) * sizeof(_type)))
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/magnitude_power_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
#include "../benchmark/magnitude_sc16_benchmark.c"
#include "../benchmark/magnitude_sc16q11_benchmark.c"
#include "../benchmark/magnitude_uc8_benchmark.c"
#include "../benchmark/mean_power_u16_benchmark.c"
static void starch_benchmark_all_magnitude_uc8(void)
{
@ -1375,6 +1375,9 @@ static void starch_benchmark_usage(const char *argv0)
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
"armv7a_neon_vfpv4 "
#endif
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
"armv8_neon_simd "
#endif
#ifdef STARCH_FLAVOR_X86_AVX2
"x86_avx2 "
#endif

View File

@ -89,6 +89,16 @@ starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
@ -174,6 +184,20 @@ starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] =
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
@ -258,6 +282,16 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 5, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 6, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
@ -343,6 +377,20 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
{ 9, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
{ 10, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
@ -424,6 +472,14 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 4, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
@ -503,6 +559,17 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
{ 7, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
@ -587,6 +654,18 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 6, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
@ -678,6 +757,23 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 2, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 3, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
{ 11, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
@ -765,6 +861,16 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 3, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
@ -850,6 +956,20 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
#endif /* STARCH_MIX_ARM */
#ifdef STARCH_MIX_AARCH64
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
{ 3, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 4, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 5, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 6, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 7, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
{ 8, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
{ 9, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
{ 10, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
#endif /* STARCH_MIX_AARCH64 */
#ifdef STARCH_MIX_X86
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },

View File

@ -14,11 +14,11 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
@ -33,9 +33,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -0,0 +1,40 @@
/* starch generated code. Do not edit. */
#define STARCH_FLAVOR_ARMV8_A
#include "starch.h"
#undef STARCH_ALIGNMENT
#define STARCH_ALIGNMENT 1
#define STARCH_ALIGNED(_ptr) (_ptr)
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_a
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_a
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
#undef STARCH_SYMBOL
#undef STARCH_IMPL
#undef STARCH_IMPL_REQUIRES
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_a
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_a
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -0,0 +1,41 @@
/* starch generated code. Do not edit. */
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
#define STARCH_FEATURE_NEON
#include "starch.h"
#undef STARCH_ALIGNMENT
#define STARCH_ALIGNMENT 1
#define STARCH_ALIGNED(_ptr) (_ptr)
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_neon_simd
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_neon_simd
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
#undef STARCH_ALIGNED
#undef STARCH_SYMBOL
#undef STARCH_IMPL
#undef STARCH_IMPL_REQUIRES
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_neon_simd
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_neon_simd
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -13,9 +13,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -13,11 +13,11 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"
#undef STARCH_ALIGNMENT
@ -32,9 +32,9 @@
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
#include "../impl/mean_power_u16.c"
#include "../impl/magnitude_power_uc8.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_sc16.c"
#include "../impl/magnitude_sc16q11.c"
#include "../impl/magnitude_uc8.c"
#include "../impl/mean_power_u16.c"

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_ARM
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,16 +21,16 @@
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -21,19 +21,19 @@
STARCH_CFLAGS := -DSTARCH_MIX_X86
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o

View File

@ -19,6 +19,13 @@
#define STARCH_MIX_ALIGNMENT 16
#endif /* STARCH_MIX_ARM */
/* AARCH64 */
#ifdef STARCH_MIX_AARCH64
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
#define STARCH_FLAVOR_GENERIC
#define STARCH_MIX_ALIGNMENT 32
#endif /* STARCH_MIX_AARCH64 */
/* x64 */
#ifdef STARCH_MIX_X86
#define STARCH_FLAVOR_X86_AVX2
@ -191,35 +198,27 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
/* flavors and prototypes */
#ifdef STARCH_FLAVOR_GENERIC
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_GENERIC */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
int cpu_supports_armv7_neon_vfpv4 (void);
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
@ -228,14 +227,12 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -246,36 +243,84 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
int cpu_supports_armv8_simd (void);
void starch_magnitude_power_uc8_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_11bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_ARMV8_NEON_SIMD */
int starch_read_wisdom (const char * path);
#ifdef STARCH_FLAVOR_X86_AVX2
int cpu_supports_avx2 (void);
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
@ -284,10 +329,18 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
#endif /* STARCH_FLAVOR_X86_AVX2 */
int starch_read_wisdom (const char * path);

View File

@ -32,6 +32,12 @@ gen.add_flavor(name = 'armv7a_neon_vfpv4',
features = ['neon'],
test_function = 'cpu_supports_armv7_neon_vfpv4',
alignment = 16)
gen.add_flavor(name = 'armv8_neon_simd',
description = 'ARMv8-A, NEON, SIMD',
compile_flags = ['-march=armv8-a+simd', '-ffast-math'],
features = ['neon'],
test_function = 'cpu_supports_armv8_simd',
alignment = 32)
gen.add_flavor(name = 'x86_avx2',
description = 'x86 with AVX2',
compile_flags = ['-mavx2', '-ffast-math'],
@ -48,6 +54,11 @@ gen.add_mix(name = 'arm',
flavors = ['armv7a_neon_vfpv4', 'generic'],
wisdom_file = 'wisdom.arm')
gen.add_mix(name = 'aarch64',
description = 'AARCH64',
flavors = ['armv8_neon_simd', 'generic'],
wisdom_file = 'wisdom.aarch64')
gen.add_mix(name = 'x86',
description = 'x64',
flavors = ['x86_avx2', 'generic'],

101
wisdom/wisdom.aarch64.pi4b Normal file
View File

@ -0,0 +1,101 @@
# generated by ./starch-benchmark -i 15 -o wisdom.aarch64.pi4b
magnitude_power_uc8 neon_vrsqrte_armv8_neon_simd # 242171 ns/call
magnitude_power_uc8 lookup_unroll_4_armv8_neon_simd # 309918 ns/call
magnitude_power_uc8 lookup_unroll_4_generic # 310083 ns/call
magnitude_power_uc8 twopass_armv8_neon_simd # 331999 ns/call
magnitude_power_uc8 twopass_generic # 332283 ns/call
magnitude_power_uc8 lookup_armv8_neon_simd # 354725 ns/call
magnitude_power_uc8 lookup_generic # 354993 ns/call
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd # 231223 ns/call
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 231231 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd # 317120 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 317202 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_generic # 317261 ns/call
magnitude_power_uc8_aligned twopass_armv8_neon_simd # 326316 ns/call
magnitude_power_uc8_aligned twopass_generic # 326441 ns/call
magnitude_power_uc8_aligned twopass_armv8_neon_simd_aligned # 339548 ns/call
magnitude_power_uc8_aligned lookup_generic # 353854 ns/call
magnitude_power_uc8_aligned lookup_armv8_neon_simd_aligned # 353897 ns/call
magnitude_power_uc8_aligned lookup_armv8_neon_simd # 354025 ns/call
magnitude_sc16 neon_vrsqrte_armv8_neon_simd # 687064 ns/call
magnitude_sc16 exact_u32_armv8_neon_simd # 1337885 ns/call
magnitude_sc16 exact_float_armv8_neon_simd # 1409773 ns/call
magnitude_sc16 exact_u32_generic # 3331842 ns/call
magnitude_sc16 exact_float_generic # 3414790 ns/call
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd # 669434 ns/call
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd_aligned # 770926 ns/call
magnitude_sc16_aligned exact_u32_armv8_neon_simd # 1336333 ns/call
magnitude_sc16_aligned exact_float_armv8_neon_simd # 1397618 ns/call
magnitude_sc16_aligned exact_u32_armv8_neon_simd_aligned # 1808644 ns/call
magnitude_sc16_aligned exact_float_armv8_neon_simd_aligned # 1927454 ns/call
magnitude_sc16_aligned exact_u32_generic # 2750034 ns/call
magnitude_sc16_aligned exact_float_generic # 3167265 ns/call
magnitude_sc16q11 neon_vrsqrte_armv8_neon_simd # 166265 ns/call
magnitude_sc16q11 exact_float_armv8_neon_simd # 347400 ns/call
magnitude_sc16q11 exact_u32_armv8_neon_simd # 350422 ns/call
magnitude_sc16q11 exact_u32_generic # 951466 ns/call
magnitude_sc16q11 exact_float_generic # 1041727 ns/call
magnitude_sc16q11 12bit_table_generic # 2008901 ns/call
magnitude_sc16q11 12bit_table_armv8_neon_simd # 2117606 ns/call
magnitude_sc16q11 11bit_table_generic # 2315294 ns/call
magnitude_sc16q11 11bit_table_armv8_neon_simd # 2317090 ns/call
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd # 155062 ns/call
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd_aligned # 212453 ns/call
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd_aligned # 329287 ns/call
magnitude_sc16q11_aligned exact_float_armv8_neon_simd_aligned # 345611 ns/call
magnitude_sc16q11_aligned exact_float_armv8_neon_simd # 426742 ns/call
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd # 493451 ns/call
magnitude_sc16q11_aligned exact_u32_generic # 993016 ns/call
magnitude_sc16q11_aligned exact_float_generic # 1041225 ns/call
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd_aligned # 2008440 ns/call
magnitude_sc16q11_aligned 12bit_table_generic # 2010237 ns/call
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd # 2010954 ns/call
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd_aligned # 2314544 ns/call
magnitude_sc16q11_aligned 11bit_table_generic # 2317709 ns/call
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd # 2672466 ns/call
magnitude_uc8 neon_vrsqrte_armv8_neon_simd # 213353 ns/call
magnitude_uc8 lookup_generic # 285617 ns/call
magnitude_uc8 lookup_armv8_neon_simd # 285723 ns/call
magnitude_uc8 lookup_unroll_4_generic # 288439 ns/call
magnitude_uc8 lookup_unroll_4_armv8_neon_simd # 288520 ns/call
magnitude_uc8 exact_armv8_neon_simd # 533721 ns/call
magnitude_uc8 exact_generic # 1703775 ns/call
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd # 214464 ns/call
magnitude_uc8_aligned lookup_armv8_neon_simd_aligned # 280649 ns/call
magnitude_uc8_aligned lookup_generic # 280742 ns/call
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd # 293121 ns/call
magnitude_uc8_aligned lookup_unroll_4_generic # 293163 ns/call
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 294461 ns/call
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 313567 ns/call
magnitude_uc8_aligned lookup_armv8_neon_simd # 340192 ns/call
magnitude_uc8_aligned exact_armv8_neon_simd # 533623 ns/call
magnitude_uc8_aligned exact_armv8_neon_simd_aligned # 731823 ns/call
magnitude_uc8_aligned exact_generic # 1705445 ns/call
mean_power_u16 u32_armv8_neon_simd # 45663 ns/call
mean_power_u16 u32_generic # 45672 ns/call
mean_power_u16 neon_float_armv8_neon_simd # 72283 ns/call
mean_power_u16 u64_armv8_neon_simd # 89187 ns/call
mean_power_u16 u64_generic # 89199 ns/call
mean_power_u16 float_armv8_neon_simd # 94634 ns/call
mean_power_u16 float_generic # 176676 ns/call
mean_power_u16_aligned u32_armv8_neon_simd # 44865 ns/call
mean_power_u16_aligned u32_generic # 52958 ns/call
mean_power_u16_aligned u32_armv8_neon_simd_aligned # 60579 ns/call
mean_power_u16_aligned neon_float_armv8_neon_simd # 77277 ns/call
mean_power_u16_aligned u64_armv8_neon_simd # 86287 ns/call
mean_power_u16_aligned u64_generic # 86295 ns/call
mean_power_u16_aligned float_armv8_neon_simd_aligned # 87501 ns/call
mean_power_u16_aligned neon_float_armv8_neon_simd_aligned # 94315 ns/call
mean_power_u16_aligned float_armv8_neon_simd # 104800 ns/call
mean_power_u16_aligned u64_armv8_neon_simd_aligned # 119504 ns/call
mean_power_u16_aligned float_generic # 176475 ns/call

101
wisdom/wisdom.aarch64.tegra Normal file
View File

@ -0,0 +1,101 @@
# generated by ./starch-benchmark -i 15 -o wisdom.aarch64.tegra
magnitude_power_uc8 neon_vrsqrte_armv8_neon_simd # 94796 ns/call
magnitude_power_uc8 lookup_armv8_neon_simd # 192167 ns/call
magnitude_power_uc8 lookup_generic # 192384 ns/call
magnitude_power_uc8 lookup_unroll_4_generic # 201674 ns/call
magnitude_power_uc8 lookup_unroll_4_armv8_neon_simd # 202605 ns/call
magnitude_power_uc8 twopass_armv8_neon_simd # 211684 ns/call
magnitude_power_uc8 twopass_generic # 212405 ns/call
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd # 94539 ns/call
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 96537 ns/call
magnitude_power_uc8_aligned lookup_armv8_neon_simd # 194018 ns/call
magnitude_power_uc8_aligned lookup_generic # 194129 ns/call
magnitude_power_uc8_aligned lookup_armv8_neon_simd_aligned # 194586 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd # 202656 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_generic # 203133 ns/call
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 203492 ns/call
magnitude_power_uc8_aligned twopass_armv8_neon_simd # 218867 ns/call
magnitude_power_uc8_aligned twopass_generic # 219683 ns/call
magnitude_power_uc8_aligned twopass_armv8_neon_simd_aligned # 232710 ns/call
magnitude_sc16 neon_vrsqrte_armv8_neon_simd # 248412 ns/call
magnitude_sc16 exact_u32_armv8_neon_simd # 497100 ns/call
magnitude_sc16 exact_float_armv8_neon_simd # 499026 ns/call
magnitude_sc16 exact_u32_generic # 2498651 ns/call
magnitude_sc16 exact_float_generic # 2630913 ns/call
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd_aligned # 251091 ns/call
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd # 251917 ns/call
magnitude_sc16_aligned exact_u32_armv8_neon_simd # 495168 ns/call
magnitude_sc16_aligned exact_float_armv8_neon_simd # 496604 ns/call
magnitude_sc16_aligned exact_u32_armv8_neon_simd_aligned # 497295 ns/call
magnitude_sc16_aligned exact_float_armv8_neon_simd_aligned # 497677 ns/call
magnitude_sc16_aligned exact_u32_generic # 2502639 ns/call
magnitude_sc16_aligned exact_float_generic # 2508165 ns/call
magnitude_sc16q11 neon_vrsqrte_armv8_neon_simd # 61889 ns/call
magnitude_sc16q11 exact_u32_armv8_neon_simd # 121180 ns/call
magnitude_sc16q11 exact_float_armv8_neon_simd # 122913 ns/call
magnitude_sc16q11 12bit_table_generic # 600092 ns/call
magnitude_sc16q11 12bit_table_armv8_neon_simd # 602741 ns/call
magnitude_sc16q11 11bit_table_armv8_neon_simd # 713333 ns/call
magnitude_sc16q11 11bit_table_generic # 747792 ns/call
magnitude_sc16q11 exact_float_generic # 819436 ns/call
magnitude_sc16q11 exact_u32_generic # 830130 ns/call
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd_aligned # 62013 ns/call
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd # 62417 ns/call
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd # 121349 ns/call
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd_aligned # 121531 ns/call
magnitude_sc16q11_aligned exact_float_armv8_neon_simd # 122073 ns/call
magnitude_sc16q11_aligned exact_float_armv8_neon_simd_aligned # 122670 ns/call
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd # 589282 ns/call
magnitude_sc16q11_aligned 12bit_table_generic # 590574 ns/call
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd_aligned # 591626 ns/call
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd # 708434 ns/call
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd_aligned # 712503 ns/call
magnitude_sc16q11_aligned 11bit_table_generic # 739828 ns/call
magnitude_sc16q11_aligned exact_float_generic # 822781 ns/call
magnitude_sc16q11_aligned exact_u32_generic # 831139 ns/call
magnitude_uc8 neon_vrsqrte_armv8_neon_simd # 75259 ns/call
magnitude_uc8 lookup_armv8_neon_simd # 185908 ns/call
magnitude_uc8 lookup_generic # 187426 ns/call
magnitude_uc8 lookup_unroll_4_armv8_neon_simd # 203217 ns/call
magnitude_uc8 lookup_unroll_4_generic # 205435 ns/call
magnitude_uc8 exact_armv8_neon_simd # 211685 ns/call
magnitude_uc8 exact_generic # 1143963 ns/call
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd # 74829 ns/call
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 75205 ns/call
magnitude_uc8_aligned lookup_armv8_neon_simd_aligned # 176228 ns/call
magnitude_uc8_aligned lookup_armv8_neon_simd # 176801 ns/call
magnitude_uc8_aligned lookup_generic # 177103 ns/call
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 196536 ns/call
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd # 197343 ns/call
magnitude_uc8_aligned lookup_unroll_4_generic # 198190 ns/call
magnitude_uc8_aligned exact_armv8_neon_simd # 210215 ns/call
magnitude_uc8_aligned exact_armv8_neon_simd_aligned # 211766 ns/call
magnitude_uc8_aligned exact_generic # 1129546 ns/call
mean_power_u16 neon_float_armv8_neon_simd # 39477 ns/call
mean_power_u16 u32_generic # 42560 ns/call
mean_power_u16 u32_armv8_neon_simd # 44544 ns/call
mean_power_u16 float_armv8_neon_simd # 52529 ns/call
mean_power_u16 u64_generic # 85141 ns/call
mean_power_u16 u64_armv8_neon_simd # 85219 ns/call
mean_power_u16 float_generic # 155312 ns/call
mean_power_u16_aligned neon_float_armv8_neon_simd # 39385 ns/call
mean_power_u16_aligned neon_float_armv8_neon_simd_aligned # 39524 ns/call
mean_power_u16_aligned u32_generic # 42604 ns/call
mean_power_u16_aligned u32_armv8_neon_simd_aligned # 42712 ns/call
mean_power_u16_aligned u32_armv8_neon_simd # 44513 ns/call
mean_power_u16_aligned float_armv8_neon_simd # 52471 ns/call
mean_power_u16_aligned float_armv8_neon_simd_aligned # 52593 ns/call
mean_power_u16_aligned u64_armv8_neon_simd # 85041 ns/call
mean_power_u16_aligned u64_generic # 85056 ns/call
mean_power_u16_aligned u64_armv8_neon_simd_aligned # 85239 ns/call
mean_power_u16_aligned float_generic # 153697 ns/call