dump1090-fa/cpu_features/include/cpuinfo_x86.h

232 lines
5.2 KiB
C
Raw Normal View History

Move all converters to starch-based implementations (#97) * Switch all conversion routines to use starch. main user-visible changes: * ensure you check out submodules ('git clone --recurse-submodules") * --version shows the CPU features and DSP implementations in use * --wisdom allows overriding of the built-in architecture wisdom * --dcfilter no longer supported * "starch-benchmark" binary will benchmark all options on the current machine and can produce a wisdom file to feed to the --wisdom option If you have a usecase for --dcfilter, please get in touch and let me know - it's an edge case and for now there's no starch/DSP support for it, but support can be written if needed. In almost all cases the new conversion routines are slightly or substantially faster than the old conversion routines. The only case that is slower is SC16/SC16Q11 on a Pi 0, which is around 10% slower due to changing from heavily approximated lookup tables to higher quality results (but SC16 is probably already out of reach of a Pi 0) * No need to build with SC16Q11_TABLE_BITS any more * Add oneoff/uc8_capture_stats (reads a UC8 capture; measures min/max/mean I and Q) * Switch UC8 conversion to 127.4 center, 128 range. Looking at actual UC8 captures from a RTL2832, the mean I and Q are actually at 127.4, so use that as the zero point. This means that the resulting I/Q maximum values could be as large as 127.6. Switch to 128 for simplicity. * Switch to the new UC8 zero offset in benchmarks, fix some bugs * Fix some bugs in SC16/SC16Q11 validation, tighten the max error requirements * Ditch UC8 approximation path, add a NEON VRQSQRTE path. * Tweak the SC16 exact path, add a new impl that uses a mix of u32 & floats. * SC16Q11 impl tweaks: * add a u32->float exact path * ditch the approximation path * add a NEON VRSQRTE path * add a 12-bit table path (using the full signed I/Q value, not absolute value) * Ditch SC16 approximation path, add NEON vrsqrte path * Add oneoff/dsp_error_measurement This runs sample input through the DSP functions that are allowed to be inexact and dumps the results as a TSV suitable for feeding to gnuplot to look at the actual errors. * Update make clean, make wisdom targets * Update wisdom based on benchmarking * Preserve the raw wisdom benchmark data * Update to latest starch * Update .gitignore for new wisdom files * Update starch generated code * Build starch-benchmark as part of the 'all' target * Use wisdom from /etc/dump1090-fa/wisdom.local if present * Package starch-benchmark and a helper script to generate local wisdom data * Remove submodules in preparation for importing them directly * Import cpu_features v0.6.0 from https://github.com/google/cpu_features/releases/tag/v0.6.0 * Import starch at commit a725c8491dc33a321565d451b385131e589d8490 from https://github.com/flightaware/starch
2021-01-21 11:45:00 +00:00
// Copyright 2017 Google LLC
// Copyright 2020 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
#define CPU_FEATURES_INCLUDE_CPUINFO_X86_H_
#include "cpu_features_cache_info.h"
#include "cpu_features_macros.h"
CPU_FEATURES_START_CPP_NAMESPACE
// See https://en.wikipedia.org/wiki/CPUID for a list of x86 cpu features.
// The field names are based on the short name provided in the wikipedia tables.
typedef struct {
int fpu : 1;
int tsc : 1;
int cx8 : 1;
int clfsh : 1;
int mmx : 1;
int aes : 1;
int erms : 1;
int f16c : 1;
int fma4 : 1;
int fma3 : 1;
int vaes : 1;
int vpclmulqdq : 1;
int bmi1 : 1;
int hle : 1;
int bmi2 : 1;
int rtm : 1;
int rdseed : 1;
int clflushopt : 1;
int clwb : 1;
int sse : 1;
int sse2 : 1;
int sse3 : 1;
int ssse3 : 1;
int sse4_1 : 1;
int sse4_2 : 1;
int sse4a : 1;
int avx : 1;
int avx2 : 1;
int avx512f : 1;
int avx512cd : 1;
int avx512er : 1;
int avx512pf : 1;
int avx512bw : 1;
int avx512dq : 1;
int avx512vl : 1;
int avx512ifma : 1;
int avx512vbmi : 1;
int avx512vbmi2 : 1;
int avx512vnni : 1;
int avx512bitalg : 1;
int avx512vpopcntdq : 1;
int avx512_4vnniw : 1;
int avx512_4vbmi2 : 1;
int avx512_second_fma : 1;
int avx512_4fmaps : 1;
int avx512_bf16 : 1;
int avx512_vp2intersect : 1;
int amx_bf16 : 1;
int amx_tile : 1;
int amx_int8 : 1;
int pclmulqdq : 1;
int smx : 1;
int sgx : 1;
int cx16 : 1; // aka. CMPXCHG16B
int sha : 1;
int popcnt : 1;
int movbe : 1;
int rdrnd : 1;
int dca : 1;
int ss : 1;
// Make sure to update X86FeaturesEnum below if you add a field here.
} X86Features;
typedef struct {
X86Features features;
int family;
int model;
int stepping;
char vendor[13]; // 0 terminated string
} X86Info;
// Calls cpuid and returns an initialized X86info.
// This function is guaranteed to be malloc, memset and memcpy free.
X86Info GetX86Info(void);
// Returns cache hierarchy informations.
// Can call cpuid multiple times.
// Only works on Intel CPU at the moment.
// This function is guaranteed to be malloc, memset and memcpy free.
CacheInfo GetX86CacheInfo(void);
typedef enum {
X86_UNKNOWN,
INTEL_CORE, // CORE
INTEL_PNR, // PENRYN
INTEL_NHM, // NEHALEM
INTEL_ATOM_BNL, // BONNELL
INTEL_WSM, // WESTMERE
INTEL_SNB, // SANDYBRIDGE
INTEL_IVB, // IVYBRIDGE
INTEL_ATOM_SMT, // SILVERMONT
INTEL_HSW, // HASWELL
INTEL_BDW, // BROADWELL
INTEL_SKL, // SKYLAKE
INTEL_ATOM_GMT, // GOLDMONT
INTEL_KBL, // KABY LAKE
INTEL_CFL, // COFFEE LAKE
INTEL_WHL, // WHISKEY LAKE
INTEL_CNL, // CANNON LAKE
INTEL_ICL, // ICE LAKE
INTEL_TGL, // TIGER LAKE
INTEL_SPR, // SAPPHIRE RAPIDS
AMD_HAMMER, // K8
AMD_K10, // K10
AMD_BOBCAT, // K14
AMD_BULLDOZER, // K15
AMD_JAGUAR, // K16
AMD_ZEN, // K17
} X86Microarchitecture;
// Returns the underlying microarchitecture by looking at X86Info's vendor,
// family and model.
X86Microarchitecture GetX86Microarchitecture(const X86Info* info);
// Calls cpuid and fills the brand_string.
// - brand_string *must* be of size 49 (beware of array decaying).
// - brand_string will be zero terminated.
// - This function calls memcpy.
void FillX86BrandString(char brand_string[49]);
////////////////////////////////////////////////////////////////////////////////
// Introspection functions
typedef enum {
X86_FPU,
X86_TSC,
X86_CX8,
X86_CLFSH,
X86_MMX,
X86_AES,
X86_ERMS,
X86_F16C,
X86_FMA4,
X86_FMA3,
X86_VAES,
X86_VPCLMULQDQ,
X86_BMI1,
X86_HLE,
X86_BMI2,
X86_RTM,
X86_RDSEED,
X86_CLFLUSHOPT,
X86_CLWB,
X86_SSE,
X86_SSE2,
X86_SSE3,
X86_SSSE3,
X86_SSE4_1,
X86_SSE4_2,
X86_SSE4A,
X86_AVX,
X86_AVX2,
X86_AVX512F,
X86_AVX512CD,
X86_AVX512ER,
X86_AVX512PF,
X86_AVX512BW,
X86_AVX512DQ,
X86_AVX512VL,
X86_AVX512IFMA,
X86_AVX512VBMI,
X86_AVX512VBMI2,
X86_AVX512VNNI,
X86_AVX512BITALG,
X86_AVX512VPOPCNTDQ,
X86_AVX512_4VNNIW,
X86_AVX512_4VBMI2,
X86_AVX512_SECOND_FMA,
X86_AVX512_4FMAPS,
X86_AVX512_BF16,
X86_AVX512_VP2INTERSECT,
X86_AMX_BF16,
X86_AMX_TILE,
X86_AMX_INT8,
X86_PCLMULQDQ,
X86_SMX,
X86_SGX,
X86_CX16,
X86_SHA,
X86_POPCNT,
X86_MOVBE,
X86_RDRND,
X86_DCA,
X86_SS,
X86_LAST_,
} X86FeaturesEnum;
int GetX86FeaturesEnumValue(const X86Features* features, X86FeaturesEnum value);
const char* GetX86FeaturesEnumName(X86FeaturesEnum);
const char* GetX86MicroarchitectureName(X86Microarchitecture);
CPU_FEATURES_END_CPP_NAMESPACE
#if !defined(CPU_FEATURES_ARCH_X86)
#error "Including cpuinfo_x86.h from a non-x86 target."
#endif
#endif // CPU_FEATURES_INCLUDE_CPUINFO_X86_H_