dump1090-fa/wisdom/wisdom.i7-6500u

# model name	: Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz
#
# "performance" cpufreq governor @ 2.50 GHz

# generated by ./starch-benchmark -i 15 -r wisdom.local -o wisdom.local 

magnitude_power_uc8                      twopass_x86_avx2                          # 65331 ns/call
magnitude_power_uc8                      twopass_generic                           # 65363 ns/call
magnitude_power_uc8                      lookup_unroll_4_generic                   # 67147 ns/call
magnitude_power_uc8                      lookup_unroll_4_x86_avx2                  # 67202 ns/call
magnitude_power_uc8                      lookup_generic                            # 74612 ns/call
magnitude_power_uc8                      lookup_x86_avx2                           # 74801 ns/call

magnitude_power_uc8_aligned              twopass_generic                           # 66243 ns/call
magnitude_power_uc8_aligned              twopass_x86_avx2                          # 66258 ns/call
magnitude_power_uc8_aligned              twopass_x86_avx2_aligned                  # 66294 ns/call
magnitude_power_uc8_aligned              lookup_unroll_4_x86_avx2_aligned          # 67621 ns/call
magnitude_power_uc8_aligned              lookup_unroll_4_x86_avx2                  # 67657 ns/call
magnitude_power_uc8_aligned              lookup_unroll_4_generic                   # 67684 ns/call
magnitude_power_uc8_aligned              lookup_generic                            # 75036 ns/call
magnitude_power_uc8_aligned              lookup_x86_avx2_aligned                   # 75191 ns/call
magnitude_power_uc8_aligned              lookup_x86_avx2                           # 75335 ns/call

magnitude_sc16                           exact_float_x86_avx2                      # 256796 ns/call
magnitude_sc16                           exact_u32_x86_avx2                        # 300270 ns/call
magnitude_sc16                           exact_float_generic                       # 1357410 ns/call
magnitude_sc16                           exact_u32_generic                         # 2039745 ns/call

magnitude_sc16_aligned                   exact_float_x86_avx2_aligned              # 225583 ns/call
magnitude_sc16_aligned                   exact_float_x86_avx2                      # 245087 ns/call
magnitude_sc16_aligned                   exact_u32_x86_avx2_aligned                # 265908 ns/call
magnitude_sc16_aligned                   exact_u32_x86_avx2                        # 289047 ns/call
magnitude_sc16_aligned                   exact_float_generic                       # 1345505 ns/call
magnitude_sc16_aligned                   exact_u32_generic                         # 2037905 ns/call

magnitude_sc16q11                        exact_float_x86_avx2                      # 63530 ns/call
magnitude_sc16q11                        exact_u32_x86_avx2                        # 74567 ns/call
magnitude_sc16q11                        exact_float_generic                       # 524297 ns/call
magnitude_sc16q11                        12bit_table_x86_avx2                      # 549772 ns/call
magnitude_sc16q11                        12bit_table_generic                       # 551318 ns/call
magnitude_sc16q11                        11bit_table_generic                       # 612628 ns/call
magnitude_sc16q11                        11bit_table_x86_avx2                      # 612833 ns/call
magnitude_sc16q11                        exact_u32_generic                         # 652008 ns/call

magnitude_sc16q11_aligned                exact_float_x86_avx2_aligned              # 56413 ns/call
magnitude_sc16q11_aligned                exact_float_x86_avx2                      # 61285 ns/call
magnitude_sc16q11_aligned                exact_u32_x86_avx2_aligned                # 66331 ns/call
magnitude_sc16q11_aligned                exact_u32_x86_avx2                        # 72272 ns/call
magnitude_sc16q11_aligned                exact_float_generic                       # 521575 ns/call
magnitude_sc16q11_aligned                12bit_table_x86_avx2                      # 549193 ns/call
magnitude_sc16q11_aligned                12bit_table_generic                       # 549588 ns/call
magnitude_sc16q11_aligned                12bit_table_x86_avx2_aligned              # 570064 ns/call
magnitude_sc16q11_aligned                11bit_table_x86_avx2                      # 616504 ns/call
magnitude_sc16q11_aligned                11bit_table_generic                       # 616961 ns/call
magnitude_sc16q11_aligned                11bit_table_x86_avx2_aligned              # 618931 ns/call
magnitude_sc16q11_aligned                exact_u32_generic                         # 650346 ns/call

magnitude_uc8                            lookup_unroll_4_x86_avx2                  # 53027 ns/call
magnitude_uc8                            lookup_unroll_4_generic                   # 53081 ns/call
magnitude_uc8                            lookup_x86_avx2                           # 53482 ns/call
magnitude_uc8                            lookup_generic                            # 53489 ns/call
magnitude_uc8                            exact_x86_avx2                            # 91623 ns/call
magnitude_uc8                            exact_generic                             # 801481 ns/call

magnitude_uc8_aligned                    lookup_unroll_4_x86_avx2                  # 53313 ns/call
magnitude_uc8_aligned                    lookup_unroll_4_generic                   # 53329 ns/call
magnitude_uc8_aligned                    lookup_unroll_4_x86_avx2_aligned          # 53358 ns/call
magnitude_uc8_aligned                    lookup_x86_avx2                           # 53692 ns/call
magnitude_uc8_aligned                    lookup_x86_avx2_aligned                   # 53790 ns/call
magnitude_uc8_aligned                    lookup_generic                            # 55871 ns/call
magnitude_uc8_aligned                    exact_x86_avx2_aligned                    # 86939 ns/call
magnitude_uc8_aligned                    exact_x86_avx2                            # 89688 ns/call
magnitude_uc8_aligned                    exact_generic                             # 802054 ns/call

mean_power_u16                           u32_x86_avx2                              # 11601 ns/call
mean_power_u16                           u32_generic                               # 18249 ns/call
mean_power_u16                           float_x86_avx2                            # 18556 ns/call
mean_power_u16                           u64_x86_avx2                              # 31297 ns/call
mean_power_u16                           u64_generic                               # 39618 ns/call
mean_power_u16                           float_generic                             # 105649 ns/call

mean_power_u16_aligned                   u32_x86_avx2                              # 11606 ns/call
mean_power_u16_aligned                   u32_x86_avx2_aligned                      # 11609 ns/call
mean_power_u16_aligned                   float_x86_avx2                            # 18231 ns/call
mean_power_u16_aligned                   float_x86_avx2_aligned                    # 18253 ns/call
mean_power_u16_aligned                   u32_generic                               # 18254 ns/call
mean_power_u16_aligned                   u64_x86_avx2_aligned                      # 31282 ns/call
mean_power_u16_aligned                   u64_x86_avx2                              # 31283 ns/call
mean_power_u16_aligned                   u64_generic                               # 39639 ns/call
mean_power_u16_aligned                   float_generic                             # 105615 ns/call

count_above_u16                          generic_x86_avx2                          # 20 ns/call
count_above_u16                          generic_generic                           # 30 ns/call

count_above_u16_aligned                  generic_x86_avx2_aligned                  # 15 ns/call
count_above_u16_aligned                  generic_x86_avx2                          # 19 ns/call
count_above_u16_aligned                  generic_generic                           # 31 ns/call
Move all converters to starch-based implementations (#97) * Switch all conversion routines to use starch. main user-visible changes: * ensure you check out submodules ('git clone --recurse-submodules") * --version shows the CPU features and DSP implementations in use * --wisdom allows overriding of the built-in architecture wisdom * --dcfilter no longer supported * "starch-benchmark" binary will benchmark all options on the current machine and can produce a wisdom file to feed to the --wisdom option If you have a usecase for --dcfilter, please get in touch and let me know - it's an edge case and for now there's no starch/DSP support for it, but support can be written if needed. In almost all cases the new conversion routines are slightly or substantially faster than the old conversion routines. The only case that is slower is SC16/SC16Q11 on a Pi 0, which is around 10% slower due to changing from heavily approximated lookup tables to higher quality results (but SC16 is probably already out of reach of a Pi 0) * No need to build with SC16Q11_TABLE_BITS any more * Add oneoff/uc8_capture_stats (reads a UC8 capture; measures min/max/mean I and Q) * Switch UC8 conversion to 127.4 center, 128 range. Looking at actual UC8 captures from a RTL2832, the mean I and Q are actually at 127.4, so use that as the zero point. This means that the resulting I/Q maximum values could be as large as 127.6. Switch to 128 for simplicity. * Switch to the new UC8 zero offset in benchmarks, fix some bugs * Fix some bugs in SC16/SC16Q11 validation, tighten the max error requirements * Ditch UC8 approximation path, add a NEON VRQSQRTE path. * Tweak the SC16 exact path, add a new impl that uses a mix of u32 & floats. * SC16Q11 impl tweaks: * add a u32->float exact path * ditch the approximation path * add a NEON VRSQRTE path * add a 12-bit table path (using the full signed I/Q value, not absolute value) * Ditch SC16 approximation path, add NEON vrsqrte path * Add oneoff/dsp_error_measurement This runs sample input through the DSP functions that are allowed to be inexact and dumps the results as a TSV suitable for feeding to gnuplot to look at the actual errors. * Update make clean, make wisdom targets * Update wisdom based on benchmarking * Preserve the raw wisdom benchmark data * Update to latest starch * Update .gitignore for new wisdom files * Update starch generated code * Build starch-benchmark as part of the 'all' target * Use wisdom from /etc/dump1090-fa/wisdom.local if present * Package starch-benchmark and a helper script to generate local wisdom data * Remove submodules in preparation for importing them directly * Import cpu_features v0.6.0 from https://github.com/google/cpu_features/releases/tag/v0.6.0 * Import starch at commit a725c8491dc33a321565d451b385131e589d8490 from https://github.com/flightaware/starch 2021-01-21 11:45:00 +00:00			`# model name : Intel(R) Core(TM) i7-6500U CPU @ 2.50GHz`
			`#`
			`# "performance" cpufreq governor @ 2.50 GHz`

			`# generated by ./starch-benchmark -i 15 -r wisdom.local -o wisdom.local`

			`magnitude_power_uc8 twopass_x86_avx2 # 65331 ns/call`
			`magnitude_power_uc8 twopass_generic # 65363 ns/call`
			`magnitude_power_uc8 lookup_unroll_4_generic # 67147 ns/call`
			`magnitude_power_uc8 lookup_unroll_4_x86_avx2 # 67202 ns/call`
			`magnitude_power_uc8 lookup_generic # 74612 ns/call`
			`magnitude_power_uc8 lookup_x86_avx2 # 74801 ns/call`

			`magnitude_power_uc8_aligned twopass_generic # 66243 ns/call`
			`magnitude_power_uc8_aligned twopass_x86_avx2 # 66258 ns/call`
			`magnitude_power_uc8_aligned twopass_x86_avx2_aligned # 66294 ns/call`
			`magnitude_power_uc8_aligned lookup_unroll_4_x86_avx2_aligned # 67621 ns/call`
			`magnitude_power_uc8_aligned lookup_unroll_4_x86_avx2 # 67657 ns/call`
			`magnitude_power_uc8_aligned lookup_unroll_4_generic # 67684 ns/call`
			`magnitude_power_uc8_aligned lookup_generic # 75036 ns/call`
			`magnitude_power_uc8_aligned lookup_x86_avx2_aligned # 75191 ns/call`
			`magnitude_power_uc8_aligned lookup_x86_avx2 # 75335 ns/call`

			`magnitude_sc16 exact_float_x86_avx2 # 256796 ns/call`
			`magnitude_sc16 exact_u32_x86_avx2 # 300270 ns/call`
			`magnitude_sc16 exact_float_generic # 1357410 ns/call`
			`magnitude_sc16 exact_u32_generic # 2039745 ns/call`

			`magnitude_sc16_aligned exact_float_x86_avx2_aligned # 225583 ns/call`
			`magnitude_sc16_aligned exact_float_x86_avx2 # 245087 ns/call`
			`magnitude_sc16_aligned exact_u32_x86_avx2_aligned # 265908 ns/call`
			`magnitude_sc16_aligned exact_u32_x86_avx2 # 289047 ns/call`
			`magnitude_sc16_aligned exact_float_generic # 1345505 ns/call`
			`magnitude_sc16_aligned exact_u32_generic # 2037905 ns/call`

			`magnitude_sc16q11 exact_float_x86_avx2 # 63530 ns/call`
			`magnitude_sc16q11 exact_u32_x86_avx2 # 74567 ns/call`
			`magnitude_sc16q11 exact_float_generic # 524297 ns/call`
			`magnitude_sc16q11 12bit_table_x86_avx2 # 549772 ns/call`
			`magnitude_sc16q11 12bit_table_generic # 551318 ns/call`
			`magnitude_sc16q11 11bit_table_generic # 612628 ns/call`
			`magnitude_sc16q11 11bit_table_x86_avx2 # 612833 ns/call`
			`magnitude_sc16q11 exact_u32_generic # 652008 ns/call`

			`magnitude_sc16q11_aligned exact_float_x86_avx2_aligned # 56413 ns/call`
			`magnitude_sc16q11_aligned exact_float_x86_avx2 # 61285 ns/call`
			`magnitude_sc16q11_aligned exact_u32_x86_avx2_aligned # 66331 ns/call`
			`magnitude_sc16q11_aligned exact_u32_x86_avx2 # 72272 ns/call`
			`magnitude_sc16q11_aligned exact_float_generic # 521575 ns/call`
			`magnitude_sc16q11_aligned 12bit_table_x86_avx2 # 549193 ns/call`
			`magnitude_sc16q11_aligned 12bit_table_generic # 549588 ns/call`
			`magnitude_sc16q11_aligned 12bit_table_x86_avx2_aligned # 570064 ns/call`
			`magnitude_sc16q11_aligned 11bit_table_x86_avx2 # 616504 ns/call`
			`magnitude_sc16q11_aligned 11bit_table_generic # 616961 ns/call`
			`magnitude_sc16q11_aligned 11bit_table_x86_avx2_aligned # 618931 ns/call`
			`magnitude_sc16q11_aligned exact_u32_generic # 650346 ns/call`

			`magnitude_uc8 lookup_unroll_4_x86_avx2 # 53027 ns/call`
			`magnitude_uc8 lookup_unroll_4_generic # 53081 ns/call`
			`magnitude_uc8 lookup_x86_avx2 # 53482 ns/call`
			`magnitude_uc8 lookup_generic # 53489 ns/call`
			`magnitude_uc8 exact_x86_avx2 # 91623 ns/call`
			`magnitude_uc8 exact_generic # 801481 ns/call`

			`magnitude_uc8_aligned lookup_unroll_4_x86_avx2 # 53313 ns/call`
			`magnitude_uc8_aligned lookup_unroll_4_generic # 53329 ns/call`
			`magnitude_uc8_aligned lookup_unroll_4_x86_avx2_aligned # 53358 ns/call`
			`magnitude_uc8_aligned lookup_x86_avx2 # 53692 ns/call`
			`magnitude_uc8_aligned lookup_x86_avx2_aligned # 53790 ns/call`
			`magnitude_uc8_aligned lookup_generic # 55871 ns/call`
			`magnitude_uc8_aligned exact_x86_avx2_aligned # 86939 ns/call`
			`magnitude_uc8_aligned exact_x86_avx2 # 89688 ns/call`
			`magnitude_uc8_aligned exact_generic # 802054 ns/call`

			`mean_power_u16 u32_x86_avx2 # 11601 ns/call`
			`mean_power_u16 u32_generic # 18249 ns/call`
			`mean_power_u16 float_x86_avx2 # 18556 ns/call`
			`mean_power_u16 u64_x86_avx2 # 31297 ns/call`
			`mean_power_u16 u64_generic # 39618 ns/call`
			`mean_power_u16 float_generic # 105649 ns/call`

			`mean_power_u16_aligned u32_x86_avx2 # 11606 ns/call`
			`mean_power_u16_aligned u32_x86_avx2_aligned # 11609 ns/call`
			`mean_power_u16_aligned float_x86_avx2 # 18231 ns/call`
			`mean_power_u16_aligned float_x86_avx2_aligned # 18253 ns/call`
			`mean_power_u16_aligned u32_generic # 18254 ns/call`
			`mean_power_u16_aligned u64_x86_avx2_aligned # 31282 ns/call`
			`mean_power_u16_aligned u64_x86_avx2 # 31283 ns/call`
			`mean_power_u16_aligned u64_generic # 39639 ns/call`
			`mean_power_u16_aligned float_generic # 105615 ns/call`
Use a starch implementation for the burst-detection sample counting loop. 2021-07-08 10:53:02 +00:00
			`count_above_u16 generic_x86_avx2 # 20 ns/call`
			`count_above_u16 generic_generic # 30 ns/call`

			`count_above_u16_aligned generic_x86_avx2_aligned # 15 ns/call`
			`count_above_u16_aligned generic_x86_avx2 # 19 ns/call`
			`count_above_u16_aligned generic_generic # 31 ns/call`