Merge pull request #108 from gtjoseph/dev-fa-starch-aarch64
Starch config: Add aarch64
This commit is contained in:
commit
5600d3fc71
26
Makefile
26
Makefile
|
|
@ -141,19 +141,23 @@ ifneq ($(CPUFEATURES),yes)
|
|||
# need to be able to detect CPU features at runtime to enable any non-standard compiler flags
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
else ifeq ($(ARCH),x86_64)
|
||||
# AVX, AVX2
|
||||
STARCH_MIX := x86
|
||||
CPPFLAGS += -DSTARCH_MIX_X86
|
||||
else ifneq (,$(findstring arm,$(ARCH)))
|
||||
# ARMv7 NEON
|
||||
STARCH_MIX := arm
|
||||
CPPFLAGS += -DSTARCH_MIX_ARM
|
||||
else
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
ifeq ($(ARCH),x86_64)
|
||||
# AVX, AVX2
|
||||
STARCH_MIX := x86
|
||||
CPPFLAGS += -DSTARCH_MIX_X86
|
||||
else ifeq ($(findstring arm,$(ARCH)),arm)
|
||||
# ARMv7 NEON
|
||||
STARCH_MIX := arm
|
||||
CPPFLAGS += -DSTARCH_MIX_ARM
|
||||
else ifeq ($(findstring aarch,$(ARCH)),aarch)
|
||||
STARCH_MIX := aarch64
|
||||
CPPFLAGS += -DSTARCH_MIX_AARCH64
|
||||
else
|
||||
STARCH_MIX := generic
|
||||
CPPFLAGS += -DSTARCH_MIX_GENERIC
|
||||
endif
|
||||
endif
|
||||
|
||||
all: showconfig dump1090 view1090 starch-benchmark
|
||||
|
||||
STARCH_COMPILE := $(CC) $(CPPFLAGS) $(CFLAGS) -c
|
||||
|
|
|
|||
|
|
@ -25,5 +25,9 @@ ifneq (,$(findstring arm,$(CPUFEATURES_ARCH)))
|
|||
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_arm.o
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring aarch64,$(CPUFEATURES_ARCH)))
|
||||
CPUFEATURES_OBJS += cpu_features/src/cpuinfo_aarch64.o
|
||||
endif
|
||||
|
||||
$(CPUFEATURES_OBJS): override CFLAGS := $(CPUFEATURES_CFLAGS)
|
||||
$(CPUFEATURES_OBJS): override CPPFLAGS := -Icpu_features/include
|
||||
|
|
|
|||
31
cpu.c
31
cpu.c
|
|
@ -76,3 +76,34 @@ int cpu_supports_armv7_neon_vfpv4(void)
|
|||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// AARCH64
|
||||
//
|
||||
|
||||
#ifdef CPU_FEATURES_ARCH_AARCH64
|
||||
#include "cpuinfo_aarch64.h"
|
||||
|
||||
static Aarch64Info *aarch64_info()
|
||||
{
|
||||
static bool valid = false;
|
||||
static Aarch64Info cache;
|
||||
|
||||
if (!valid) {
|
||||
cache = GetAarch64Info();
|
||||
valid = true;
|
||||
}
|
||||
|
||||
return &cache;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int cpu_supports_armv8_simd(void)
|
||||
{
|
||||
#ifdef CPU_FEATURES_ARCH_AARCH64
|
||||
return aarch64_info()->features.asimd;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
4
cpu.h
4
cpu.h
|
|
@ -8,4 +8,8 @@ int cpu_supports_avx2(void);
|
|||
// ARM
|
||||
int cpu_supports_armv7_neon_vfpv4(void);
|
||||
|
||||
// AARCH64
|
||||
int cpu_supports_armv8_simd(void);
|
||||
int cpu_supports_armv8_simd_sve(void);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1246,11 +1246,11 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(1, alignof(_type), (_count) * sizeof(_type)))
|
||||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
#undef STARCH_ALIGNED
|
||||
|
|
@ -1274,11 +1274,11 @@ static void starch_benchmark_run_mean_power_u16_aligned( const uint16_t * arg0,
|
|||
#define STARCH_BENCHMARK_ALLOC(_count, _type) ((_type *) starch_benchmark_aligned_alloc(STARCH_MIX_ALIGNMENT, alignof(_type), (_count) * sizeof(_type)))
|
||||
#define STARCH_BENCHMARK_FREE(_ptr) starch_benchmark_aligned_free(_ptr)
|
||||
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/magnitude_power_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16_benchmark.c"
|
||||
#include "../benchmark/magnitude_sc16q11_benchmark.c"
|
||||
#include "../benchmark/magnitude_uc8_benchmark.c"
|
||||
#include "../benchmark/mean_power_u16_benchmark.c"
|
||||
|
||||
static void starch_benchmark_all_magnitude_uc8(void)
|
||||
{
|
||||
|
|
@ -1375,6 +1375,9 @@ static void starch_benchmark_usage(const char *argv0)
|
|||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
"armv7a_neon_vfpv4 "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
"armv8_neon_simd "
|
||||
#endif
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
"x86_avx2 "
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -89,6 +89,16 @@ starch_magnitude_uc8_regentry starch_magnitude_uc8_registry[] = {
|
|||
{ 6, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
{ 3, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
|
|
@ -174,6 +184,20 @@ starch_magnitude_uc8_aligned_regentry starch_magnitude_uc8_aligned_registry[] =
|
|||
{ 10, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
{ 1, "lookup_generic", "generic", starch_magnitude_uc8_lookup_generic, NULL },
|
||||
{ 2, "exact_generic", "generic", starch_magnitude_uc8_exact_generic, NULL },
|
||||
{ 3, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "exact_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_exact_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "lookup_unroll_4_x86_avx2", "x86_avx2", starch_magnitude_uc8_lookup_unroll_4_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "lookup_unroll_4_generic", "generic", starch_magnitude_uc8_lookup_unroll_4_generic, NULL },
|
||||
|
|
@ -258,6 +282,16 @@ starch_magnitude_power_uc8_regentry starch_magnitude_power_uc8_registry[] = {
|
|||
{ 6, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 5, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 6, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2", "x86_avx2", starch_magnitude_power_uc8_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
|
|
@ -343,6 +377,20 @@ starch_magnitude_power_uc8_aligned_regentry starch_magnitude_power_uc8_aligned_r
|
|||
{ 10, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "twopass_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "lookup_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "lookup_unroll_4_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "twopass_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_twopass_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "lookup_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "lookup_unroll_4_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
{ 9, "lookup_generic", "generic", starch_magnitude_power_uc8_lookup_generic, NULL },
|
||||
{ 10, "lookup_unroll_4_generic", "generic", starch_magnitude_power_uc8_lookup_unroll_4_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "twopass_x86_avx2_aligned", "x86_avx2", starch_magnitude_power_uc8_aligned_twopass_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "twopass_generic", "generic", starch_magnitude_power_uc8_twopass_generic, NULL },
|
||||
|
|
@ -424,6 +472,14 @@ starch_magnitude_sc16_regentry starch_magnitude_sc16_registry[] = {
|
|||
{ 4, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 4, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
|
|
@ -503,6 +559,17 @@ starch_magnitude_sc16_aligned_regentry starch_magnitude_sc16_aligned_registry[]
|
|||
{ 7, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_u32_generic", "generic", starch_magnitude_sc16_exact_u32_generic, NULL },
|
||||
{ 7, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16_exact_float_generic, NULL },
|
||||
|
|
@ -587,6 +654,18 @@ starch_magnitude_sc16q11_regentry starch_magnitude_sc16q11_registry[] = {
|
|||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 6, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 7, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 8, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2", "x86_avx2", starch_magnitude_sc16q11_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
|
|
@ -678,6 +757,23 @@ starch_magnitude_sc16q11_aligned_regentry starch_magnitude_sc16q11_aligned_regis
|
|||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "exact_u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 1, "exact_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 2, "11bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 3, "12bit_table_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "neon_vrsqrte_armv8_neon_simd_aligned", "armv8_neon_simd", starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "exact_u32_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "exact_float_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_exact_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "11bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_11bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "12bit_table_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_12bit_table_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "neon_vrsqrte_armv8_neon_simd", "armv8_neon_simd", starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "exact_u32_generic", "generic", starch_magnitude_sc16q11_exact_u32_generic, NULL },
|
||||
{ 11, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
{ 12, "11bit_table_generic", "generic", starch_magnitude_sc16q11_11bit_table_generic, NULL },
|
||||
{ 13, "12bit_table_generic", "generic", starch_magnitude_sc16q11_12bit_table_generic, NULL },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "exact_float_x86_avx2_aligned", "x86_avx2", starch_magnitude_sc16q11_aligned_exact_float_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "exact_float_generic", "generic", starch_magnitude_sc16q11_exact_float_generic, NULL },
|
||||
|
|
@ -765,6 +861,16 @@ starch_mean_power_u16_regentry starch_mean_power_u16_registry[] = {
|
|||
{ 6, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 3, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2", "x86_avx2", starch_mean_power_u16_u32_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
|
|
@ -850,6 +956,20 @@ starch_mean_power_u16_aligned_regentry starch_mean_power_u16_aligned_registry[]
|
|||
{ 10, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
{ 0, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
{ 1, "u64_generic", "generic", starch_mean_power_u16_u64_generic, NULL },
|
||||
{ 2, "float_generic", "generic", starch_mean_power_u16_float_generic, NULL },
|
||||
{ 3, "float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 4, "u32_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 5, "u64_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 6, "neon_float_armv8_neon_simd_aligned", "armv8_neon_simd", starch_mean_power_u16_aligned_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 7, "float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 8, "u32_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u32_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 9, "u64_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_u64_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
{ 10, "neon_float_armv8_neon_simd", "armv8_neon_simd", starch_mean_power_u16_neon_float_armv8_neon_simd, cpu_supports_armv8_simd },
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
#ifdef STARCH_MIX_X86
|
||||
{ 0, "u32_x86_avx2_aligned", "x86_avx2", starch_mean_power_u16_aligned_u32_x86_avx2, cpu_supports_avx2 },
|
||||
{ 1, "u32_generic", "generic", starch_mean_power_u16_u32_generic, NULL },
|
||||
|
|
|
|||
|
|
@ -14,11 +14,11 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
|
@ -33,9 +33,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv7a_neon_vfpv4
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
/* starch generated code. Do not edit. */
|
||||
|
||||
#define STARCH_FLAVOR_ARMV8_A
|
||||
|
||||
#include "starch.h"
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
||||
#define STARCH_ALIGNMENT 1
|
||||
#define STARCH_ALIGNED(_ptr) (_ptr)
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_a
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_a
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
#undef STARCH_ALIGNED
|
||||
#undef STARCH_SYMBOL
|
||||
#undef STARCH_IMPL
|
||||
#undef STARCH_IMPL_REQUIRES
|
||||
|
||||
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
|
||||
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_a
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_a
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
/* starch generated code. Do not edit. */
|
||||
|
||||
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
#define STARCH_FEATURE_NEON
|
||||
|
||||
#include "starch.h"
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
||||
#define STARCH_ALIGNMENT 1
|
||||
#define STARCH_ALIGNED(_ptr) (_ptr)
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _ ## armv8_neon_simd
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## armv8_neon_simd
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
#undef STARCH_ALIGNED
|
||||
#undef STARCH_SYMBOL
|
||||
#undef STARCH_IMPL
|
||||
#undef STARCH_IMPL_REQUIRES
|
||||
|
||||
#define STARCH_ALIGNMENT STARCH_MIX_ALIGNMENT
|
||||
#define STARCH_ALIGNED(_ptr) (__builtin_assume_aligned((_ptr), STARCH_MIX_ALIGNMENT))
|
||||
#define STARCH_SYMBOL(_name) starch_ ## _name ## _aligned_ ## armv8_neon_simd
|
||||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## armv8_neon_simd
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
@ -13,9 +13,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## generic
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
||||
#undef STARCH_ALIGNMENT
|
||||
|
|
@ -32,9 +32,9 @@
|
|||
#define STARCH_IMPL(_function,_impl) starch_ ## _function ## _aligned_ ## _impl ## _ ## x86_avx2
|
||||
#define STARCH_IMPL_REQUIRES(_function,_impl,_feature) STARCH_IMPL(_function,_impl)
|
||||
|
||||
#include "../impl/mean_power_u16.c"
|
||||
#include "../impl/magnitude_power_uc8.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_sc16.c"
|
||||
#include "../impl/magnitude_sc16q11.c"
|
||||
#include "../impl/magnitude_uc8.c"
|
||||
#include "../impl/mean_power_u16.c"
|
||||
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_ARM
|
||||
|
||||
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.armv7a_neon_vfpv4.o: dsp/generated/flavor.armv7a_neon_vfpv4.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -march=armv7-a+neon-vfpv4 -mfpu=neon-vfpv4 -ffast-math dsp/generated/flavor.armv7a_neon_vfpv4.c -o dsp/generated/flavor.armv7a_neon_vfpv4.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.armv7a_neon_vfpv4.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,16 +21,16 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_GENERIC
|
||||
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@
|
|||
STARCH_CFLAGS := -DSTARCH_MIX_X86
|
||||
|
||||
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.x86_avx2.o: dsp/generated/flavor.x86_avx2.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) -mavx2 -ffast-math dsp/generated/flavor.x86_avx2.c -o dsp/generated/flavor.x86_avx2.o
|
||||
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/flavor.generic.o: dsp/generated/flavor.generic.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/flavor.generic.c -o dsp/generated/flavor.generic.o
|
||||
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/mean_power_u16.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_uc8.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_sc16.c
|
||||
dsp/generated/dispatcher.o: dsp/generated/dispatcher.c dsp/impl/magnitude_power_uc8.c dsp/impl/magnitude_sc16.c dsp/impl/magnitude_sc16q11.c dsp/impl/magnitude_uc8.c dsp/impl/mean_power_u16.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/dispatcher.c -o dsp/generated/dispatcher.o
|
||||
|
||||
STARCH_OBJS := dsp/generated/flavor.x86_avx2.o dsp/generated/flavor.generic.o dsp/generated/dispatcher.o
|
||||
|
||||
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c
|
||||
dsp/generated/benchmark.o: dsp/generated/benchmark.c dsp/benchmark/magnitude_power_uc8_benchmark.c dsp/benchmark/magnitude_sc16_benchmark.c dsp/benchmark/magnitude_sc16q11_benchmark.c dsp/benchmark/magnitude_uc8_benchmark.c dsp/benchmark/mean_power_u16_benchmark.c
|
||||
$(STARCH_COMPILE) $(STARCH_CFLAGS) dsp/generated/benchmark.c -o dsp/generated/benchmark.o
|
||||
|
||||
STARCH_BENCHMARK_OBJ := dsp/generated/benchmark.o
|
||||
|
|
|
|||
|
|
@ -19,6 +19,13 @@
|
|||
#define STARCH_MIX_ALIGNMENT 16
|
||||
#endif /* STARCH_MIX_ARM */
|
||||
|
||||
/* AARCH64 */
|
||||
#ifdef STARCH_MIX_AARCH64
|
||||
#define STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
#define STARCH_FLAVOR_GENERIC
|
||||
#define STARCH_MIX_ALIGNMENT 32
|
||||
#endif /* STARCH_MIX_AARCH64 */
|
||||
|
||||
/* x64 */
|
||||
#ifdef STARCH_MIX_X86
|
||||
#define STARCH_FLAVOR_X86_AVX2
|
||||
|
|
@ -191,35 +198,27 @@ void starch_mean_power_u16_aligned_set_wisdom( const char * const * received_wis
|
|||
/* flavors and prototypes */
|
||||
|
||||
#ifdef STARCH_FLAVOR_GENERIC
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_generic ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_generic ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_generic ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_GENERIC */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV7A_NEON_VFPV4
|
||||
int cpu_supports_armv7_neon_vfpv4 (void);
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
|
|
@ -228,14 +227,12 @@ void starch_magnitude_power_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t
|
|||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -246,36 +243,84 @@ void starch_magnitude_sc16q11_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg
|
|||
void starch_magnitude_sc16q11_aligned_12bit_table_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv7a_neon_vfpv4 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv7a_neon_vfpv4 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_ARMV7A_NEON_VFPV4 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_ARMV8_NEON_SIMD
|
||||
int cpu_supports_armv8_simd (void);
|
||||
void starch_magnitude_power_uc8_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_sc16_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_float_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_11bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_11bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_neon_vrsqrte_armv8_neon_simd ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_neon_vrsqrte_armv8_neon_simd ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_neon_float_armv8_neon_simd ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_ARMV8_NEON_SIMD */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
||||
#ifdef STARCH_FLAVOR_X86_AVX2
|
||||
int cpu_supports_avx2 (void);
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_magnitude_power_uc8_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_twopass_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_power_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2, double * arg3, double * arg4 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
|
|
@ -284,10 +329,18 @@ void starch_magnitude_sc16q11_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16
|
|||
void starch_magnitude_sc16q11_aligned_11bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16q11_aligned_12bit_table_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_u32_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_sc16_aligned_exact_float_x86_avx2 ( const sc16_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_lookup_unroll_4_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_magnitude_uc8_aligned_exact_x86_avx2 ( const uc8_t * arg0, uint16_t * arg1, unsigned arg2 );
|
||||
void starch_mean_power_u16_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_float_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u32_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
void starch_mean_power_u16_aligned_u64_x86_avx2 ( const uint16_t * arg0, unsigned arg1, double * arg2, double * arg3 );
|
||||
#endif /* STARCH_FLAVOR_X86_AVX2 */
|
||||
|
||||
int starch_read_wisdom (const char * path);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,12 @@ gen.add_flavor(name = 'armv7a_neon_vfpv4',
|
|||
features = ['neon'],
|
||||
test_function = 'cpu_supports_armv7_neon_vfpv4',
|
||||
alignment = 16)
|
||||
gen.add_flavor(name = 'armv8_neon_simd',
|
||||
description = 'ARMv8-A, NEON, SIMD',
|
||||
compile_flags = ['-march=armv8-a+simd', '-ffast-math'],
|
||||
features = ['neon'],
|
||||
test_function = 'cpu_supports_armv8_simd',
|
||||
alignment = 32)
|
||||
gen.add_flavor(name = 'x86_avx2',
|
||||
description = 'x86 with AVX2',
|
||||
compile_flags = ['-mavx2', '-ffast-math'],
|
||||
|
|
@ -48,6 +54,11 @@ gen.add_mix(name = 'arm',
|
|||
flavors = ['armv7a_neon_vfpv4', 'generic'],
|
||||
wisdom_file = 'wisdom.arm')
|
||||
|
||||
gen.add_mix(name = 'aarch64',
|
||||
description = 'AARCH64',
|
||||
flavors = ['armv8_neon_simd', 'generic'],
|
||||
wisdom_file = 'wisdom.aarch64')
|
||||
|
||||
gen.add_mix(name = 'x86',
|
||||
description = 'x64',
|
||||
flavors = ['x86_avx2', 'generic'],
|
||||
|
|
|
|||
|
|
@ -0,0 +1,101 @@
|
|||
# generated by ./starch-benchmark -i 15 -o wisdom.aarch64.pi4b
|
||||
|
||||
magnitude_power_uc8 neon_vrsqrte_armv8_neon_simd # 242171 ns/call
|
||||
magnitude_power_uc8 lookup_unroll_4_armv8_neon_simd # 309918 ns/call
|
||||
magnitude_power_uc8 lookup_unroll_4_generic # 310083 ns/call
|
||||
magnitude_power_uc8 twopass_armv8_neon_simd # 331999 ns/call
|
||||
magnitude_power_uc8 twopass_generic # 332283 ns/call
|
||||
magnitude_power_uc8 lookup_armv8_neon_simd # 354725 ns/call
|
||||
magnitude_power_uc8 lookup_generic # 354993 ns/call
|
||||
|
||||
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd # 231223 ns/call
|
||||
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 231231 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd # 317120 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 317202 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_generic # 317261 ns/call
|
||||
magnitude_power_uc8_aligned twopass_armv8_neon_simd # 326316 ns/call
|
||||
magnitude_power_uc8_aligned twopass_generic # 326441 ns/call
|
||||
magnitude_power_uc8_aligned twopass_armv8_neon_simd_aligned # 339548 ns/call
|
||||
magnitude_power_uc8_aligned lookup_generic # 353854 ns/call
|
||||
magnitude_power_uc8_aligned lookup_armv8_neon_simd_aligned # 353897 ns/call
|
||||
magnitude_power_uc8_aligned lookup_armv8_neon_simd # 354025 ns/call
|
||||
|
||||
magnitude_sc16 neon_vrsqrte_armv8_neon_simd # 687064 ns/call
|
||||
magnitude_sc16 exact_u32_armv8_neon_simd # 1337885 ns/call
|
||||
magnitude_sc16 exact_float_armv8_neon_simd # 1409773 ns/call
|
||||
magnitude_sc16 exact_u32_generic # 3331842 ns/call
|
||||
magnitude_sc16 exact_float_generic # 3414790 ns/call
|
||||
|
||||
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd # 669434 ns/call
|
||||
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd_aligned # 770926 ns/call
|
||||
magnitude_sc16_aligned exact_u32_armv8_neon_simd # 1336333 ns/call
|
||||
magnitude_sc16_aligned exact_float_armv8_neon_simd # 1397618 ns/call
|
||||
magnitude_sc16_aligned exact_u32_armv8_neon_simd_aligned # 1808644 ns/call
|
||||
magnitude_sc16_aligned exact_float_armv8_neon_simd_aligned # 1927454 ns/call
|
||||
magnitude_sc16_aligned exact_u32_generic # 2750034 ns/call
|
||||
magnitude_sc16_aligned exact_float_generic # 3167265 ns/call
|
||||
|
||||
magnitude_sc16q11 neon_vrsqrte_armv8_neon_simd # 166265 ns/call
|
||||
magnitude_sc16q11 exact_float_armv8_neon_simd # 347400 ns/call
|
||||
magnitude_sc16q11 exact_u32_armv8_neon_simd # 350422 ns/call
|
||||
magnitude_sc16q11 exact_u32_generic # 951466 ns/call
|
||||
magnitude_sc16q11 exact_float_generic # 1041727 ns/call
|
||||
magnitude_sc16q11 12bit_table_generic # 2008901 ns/call
|
||||
magnitude_sc16q11 12bit_table_armv8_neon_simd # 2117606 ns/call
|
||||
magnitude_sc16q11 11bit_table_generic # 2315294 ns/call
|
||||
magnitude_sc16q11 11bit_table_armv8_neon_simd # 2317090 ns/call
|
||||
|
||||
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd # 155062 ns/call
|
||||
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd_aligned # 212453 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd_aligned # 329287 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_armv8_neon_simd_aligned # 345611 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_armv8_neon_simd # 426742 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd # 493451 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_generic # 993016 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_generic # 1041225 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd_aligned # 2008440 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_generic # 2010237 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd # 2010954 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd_aligned # 2314544 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_generic # 2317709 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd # 2672466 ns/call
|
||||
|
||||
magnitude_uc8 neon_vrsqrte_armv8_neon_simd # 213353 ns/call
|
||||
magnitude_uc8 lookup_generic # 285617 ns/call
|
||||
magnitude_uc8 lookup_armv8_neon_simd # 285723 ns/call
|
||||
magnitude_uc8 lookup_unroll_4_generic # 288439 ns/call
|
||||
magnitude_uc8 lookup_unroll_4_armv8_neon_simd # 288520 ns/call
|
||||
magnitude_uc8 exact_armv8_neon_simd # 533721 ns/call
|
||||
magnitude_uc8 exact_generic # 1703775 ns/call
|
||||
|
||||
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd # 214464 ns/call
|
||||
magnitude_uc8_aligned lookup_armv8_neon_simd_aligned # 280649 ns/call
|
||||
magnitude_uc8_aligned lookup_generic # 280742 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd # 293121 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_generic # 293163 ns/call
|
||||
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 294461 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 313567 ns/call
|
||||
magnitude_uc8_aligned lookup_armv8_neon_simd # 340192 ns/call
|
||||
magnitude_uc8_aligned exact_armv8_neon_simd # 533623 ns/call
|
||||
magnitude_uc8_aligned exact_armv8_neon_simd_aligned # 731823 ns/call
|
||||
magnitude_uc8_aligned exact_generic # 1705445 ns/call
|
||||
|
||||
mean_power_u16 u32_armv8_neon_simd # 45663 ns/call
|
||||
mean_power_u16 u32_generic # 45672 ns/call
|
||||
mean_power_u16 neon_float_armv8_neon_simd # 72283 ns/call
|
||||
mean_power_u16 u64_armv8_neon_simd # 89187 ns/call
|
||||
mean_power_u16 u64_generic # 89199 ns/call
|
||||
mean_power_u16 float_armv8_neon_simd # 94634 ns/call
|
||||
mean_power_u16 float_generic # 176676 ns/call
|
||||
|
||||
mean_power_u16_aligned u32_armv8_neon_simd # 44865 ns/call
|
||||
mean_power_u16_aligned u32_generic # 52958 ns/call
|
||||
mean_power_u16_aligned u32_armv8_neon_simd_aligned # 60579 ns/call
|
||||
mean_power_u16_aligned neon_float_armv8_neon_simd # 77277 ns/call
|
||||
mean_power_u16_aligned u64_armv8_neon_simd # 86287 ns/call
|
||||
mean_power_u16_aligned u64_generic # 86295 ns/call
|
||||
mean_power_u16_aligned float_armv8_neon_simd_aligned # 87501 ns/call
|
||||
mean_power_u16_aligned neon_float_armv8_neon_simd_aligned # 94315 ns/call
|
||||
mean_power_u16_aligned float_armv8_neon_simd # 104800 ns/call
|
||||
mean_power_u16_aligned u64_armv8_neon_simd_aligned # 119504 ns/call
|
||||
mean_power_u16_aligned float_generic # 176475 ns/call
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
# generated by ./starch-benchmark -i 15 -o wisdom.aarch64.tegra
|
||||
|
||||
magnitude_power_uc8 neon_vrsqrte_armv8_neon_simd # 94796 ns/call
|
||||
magnitude_power_uc8 lookup_armv8_neon_simd # 192167 ns/call
|
||||
magnitude_power_uc8 lookup_generic # 192384 ns/call
|
||||
magnitude_power_uc8 lookup_unroll_4_generic # 201674 ns/call
|
||||
magnitude_power_uc8 lookup_unroll_4_armv8_neon_simd # 202605 ns/call
|
||||
magnitude_power_uc8 twopass_armv8_neon_simd # 211684 ns/call
|
||||
magnitude_power_uc8 twopass_generic # 212405 ns/call
|
||||
|
||||
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd # 94539 ns/call
|
||||
magnitude_power_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 96537 ns/call
|
||||
magnitude_power_uc8_aligned lookup_armv8_neon_simd # 194018 ns/call
|
||||
magnitude_power_uc8_aligned lookup_generic # 194129 ns/call
|
||||
magnitude_power_uc8_aligned lookup_armv8_neon_simd_aligned # 194586 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd # 202656 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_generic # 203133 ns/call
|
||||
magnitude_power_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 203492 ns/call
|
||||
magnitude_power_uc8_aligned twopass_armv8_neon_simd # 218867 ns/call
|
||||
magnitude_power_uc8_aligned twopass_generic # 219683 ns/call
|
||||
magnitude_power_uc8_aligned twopass_armv8_neon_simd_aligned # 232710 ns/call
|
||||
|
||||
magnitude_sc16 neon_vrsqrte_armv8_neon_simd # 248412 ns/call
|
||||
magnitude_sc16 exact_u32_armv8_neon_simd # 497100 ns/call
|
||||
magnitude_sc16 exact_float_armv8_neon_simd # 499026 ns/call
|
||||
magnitude_sc16 exact_u32_generic # 2498651 ns/call
|
||||
magnitude_sc16 exact_float_generic # 2630913 ns/call
|
||||
|
||||
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd_aligned # 251091 ns/call
|
||||
magnitude_sc16_aligned neon_vrsqrte_armv8_neon_simd # 251917 ns/call
|
||||
magnitude_sc16_aligned exact_u32_armv8_neon_simd # 495168 ns/call
|
||||
magnitude_sc16_aligned exact_float_armv8_neon_simd # 496604 ns/call
|
||||
magnitude_sc16_aligned exact_u32_armv8_neon_simd_aligned # 497295 ns/call
|
||||
magnitude_sc16_aligned exact_float_armv8_neon_simd_aligned # 497677 ns/call
|
||||
magnitude_sc16_aligned exact_u32_generic # 2502639 ns/call
|
||||
magnitude_sc16_aligned exact_float_generic # 2508165 ns/call
|
||||
|
||||
magnitude_sc16q11 neon_vrsqrte_armv8_neon_simd # 61889 ns/call
|
||||
magnitude_sc16q11 exact_u32_armv8_neon_simd # 121180 ns/call
|
||||
magnitude_sc16q11 exact_float_armv8_neon_simd # 122913 ns/call
|
||||
magnitude_sc16q11 12bit_table_generic # 600092 ns/call
|
||||
magnitude_sc16q11 12bit_table_armv8_neon_simd # 602741 ns/call
|
||||
magnitude_sc16q11 11bit_table_armv8_neon_simd # 713333 ns/call
|
||||
magnitude_sc16q11 11bit_table_generic # 747792 ns/call
|
||||
magnitude_sc16q11 exact_float_generic # 819436 ns/call
|
||||
magnitude_sc16q11 exact_u32_generic # 830130 ns/call
|
||||
|
||||
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd_aligned # 62013 ns/call
|
||||
magnitude_sc16q11_aligned neon_vrsqrte_armv8_neon_simd # 62417 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd # 121349 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_armv8_neon_simd_aligned # 121531 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_armv8_neon_simd # 122073 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_armv8_neon_simd_aligned # 122670 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd # 589282 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_generic # 590574 ns/call
|
||||
magnitude_sc16q11_aligned 12bit_table_armv8_neon_simd_aligned # 591626 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd # 708434 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_armv8_neon_simd_aligned # 712503 ns/call
|
||||
magnitude_sc16q11_aligned 11bit_table_generic # 739828 ns/call
|
||||
magnitude_sc16q11_aligned exact_float_generic # 822781 ns/call
|
||||
magnitude_sc16q11_aligned exact_u32_generic # 831139 ns/call
|
||||
|
||||
magnitude_uc8 neon_vrsqrte_armv8_neon_simd # 75259 ns/call
|
||||
magnitude_uc8 lookup_armv8_neon_simd # 185908 ns/call
|
||||
magnitude_uc8 lookup_generic # 187426 ns/call
|
||||
magnitude_uc8 lookup_unroll_4_armv8_neon_simd # 203217 ns/call
|
||||
magnitude_uc8 lookup_unroll_4_generic # 205435 ns/call
|
||||
magnitude_uc8 exact_armv8_neon_simd # 211685 ns/call
|
||||
magnitude_uc8 exact_generic # 1143963 ns/call
|
||||
|
||||
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd # 74829 ns/call
|
||||
magnitude_uc8_aligned neon_vrsqrte_armv8_neon_simd_aligned # 75205 ns/call
|
||||
magnitude_uc8_aligned lookup_armv8_neon_simd_aligned # 176228 ns/call
|
||||
magnitude_uc8_aligned lookup_armv8_neon_simd # 176801 ns/call
|
||||
magnitude_uc8_aligned lookup_generic # 177103 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd_aligned # 196536 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_armv8_neon_simd # 197343 ns/call
|
||||
magnitude_uc8_aligned lookup_unroll_4_generic # 198190 ns/call
|
||||
magnitude_uc8_aligned exact_armv8_neon_simd # 210215 ns/call
|
||||
magnitude_uc8_aligned exact_armv8_neon_simd_aligned # 211766 ns/call
|
||||
magnitude_uc8_aligned exact_generic # 1129546 ns/call
|
||||
|
||||
mean_power_u16 neon_float_armv8_neon_simd # 39477 ns/call
|
||||
mean_power_u16 u32_generic # 42560 ns/call
|
||||
mean_power_u16 u32_armv8_neon_simd # 44544 ns/call
|
||||
mean_power_u16 float_armv8_neon_simd # 52529 ns/call
|
||||
mean_power_u16 u64_generic # 85141 ns/call
|
||||
mean_power_u16 u64_armv8_neon_simd # 85219 ns/call
|
||||
mean_power_u16 float_generic # 155312 ns/call
|
||||
|
||||
mean_power_u16_aligned neon_float_armv8_neon_simd # 39385 ns/call
|
||||
mean_power_u16_aligned neon_float_armv8_neon_simd_aligned # 39524 ns/call
|
||||
mean_power_u16_aligned u32_generic # 42604 ns/call
|
||||
mean_power_u16_aligned u32_armv8_neon_simd_aligned # 42712 ns/call
|
||||
mean_power_u16_aligned u32_armv8_neon_simd # 44513 ns/call
|
||||
mean_power_u16_aligned float_armv8_neon_simd # 52471 ns/call
|
||||
mean_power_u16_aligned float_armv8_neon_simd_aligned # 52593 ns/call
|
||||
mean_power_u16_aligned u64_armv8_neon_simd # 85041 ns/call
|
||||
mean_power_u16_aligned u64_generic # 85056 ns/call
|
||||
mean_power_u16_aligned u64_armv8_neon_simd_aligned # 85239 ns/call
|
||||
mean_power_u16_aligned float_generic # 153697 ns/call
|
||||
Loading…
Reference in New Issue