Go to the documentation of this file.
16#ifndef HIGHWAY_HWY_DETECT_TARGETS_H_
17#define HIGHWAY_HWY_DETECT_TARGETS_H_
65#define HWY_AVX3_DL (1LL << 7)
66#define HWY_AVX3 (1LL << 8)
67#define HWY_AVX2 (1LL << 9)
69#define HWY_SSE4 (1LL << 11)
70#define HWY_SSSE3 (1LL << 12)
76#define HWY_HIGHEST_TARGET_BIT_X86 14
80#define HWY_SVE2_128 (1LL << 24)
81#define HWY_SVE_256 (1LL << 25)
82#define HWY_SVE2 (1LL << 26)
83#define HWY_SVE (1LL << 27)
84#define HWY_NEON (1LL << 28)
86#define HWY_HIGHEST_TARGET_BIT_ARM 29
90#define HWY_RVV (1LL << 37)
92#define HWY_HIGHEST_TARGET_BIT_RVV 38
100#define HWY_PPC8 (1LL << 49)
102#define HWY_HIGHEST_TARGET_BIT_PPC 51
106#define HWY_WASM_EMU256 (1LL << 58)
107#define HWY_WASM (1LL << 59)
109#define HWY_HIGHEST_TARGET_BIT_WASM 60
113#define HWY_EMU128 (1LL << 61)
115#define HWY_SCALAR (1LL << 62)
116#define HWY_HIGHEST_TARGET_BIT_SCALAR 62
125#ifndef HWY_DISABLED_TARGETS
126#define HWY_DISABLED_TARGETS 0
131#ifndef HWY_BROKEN_TARGETS
135#if HWY_ARCH_X86 && (HWY_COMPILER_CLANG != 0 && HWY_COMPILER_CLANG < 700)
136#define HWY_BROKEN_TARGETS (HWY_SSE4 | HWY_AVX2 | HWY_AVX3 | HWY_AVX3_DL)
139#if !defined(HWY_COMPILE_ONLY_SCALAR)
140#pragma message("x86 Clang <= 6: define HWY_COMPILE_ONLY_SCALAR or upgrade.")
145#define HWY_BROKEN_TARGETS (HWY_AVX2 | HWY_AVX3 | HWY_AVX3_DL)
148#elif HWY_COMPILER_MSVC != 0
149#define HWY_BROKEN_TARGETS (HWY_AVX3 | HWY_AVX3_DL)
152#elif HWY_ARCH_ARM_V7 && \
153 (defined(__ARM_BIG_ENDIAN) || \
154 (defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN))
155#define HWY_BROKEN_TARGETS (HWY_NEON)
158#elif (HWY_COMPILER_CLANG && HWY_COMPILER_CLANG < 1100) || \
159 (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1000)
160#define HWY_BROKEN_TARGETS (HWY_SVE | HWY_SVE2 | HWY_SVE_256 | HWY_SVE2_128)
163#define HWY_BROKEN_TARGETS 0
169#define HWY_ENABLED(targets) \
170 ((targets) & ~((HWY_DISABLED_TARGETS) | (HWY_BROKEN_TARGETS)))
177#if !defined(HWY_BROKEN_EMU128)
178#if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1203) || \
179 defined(HWY_NO_LIBCXX)
180#define HWY_BROKEN_EMU128 1
182#define HWY_BROKEN_EMU128 0
193#if defined(HWY_COMPILE_ONLY_SCALAR) || HWY_BROKEN_EMU128
194#define HWY_BASELINE_SCALAR HWY_SCALAR
196#define HWY_BASELINE_SCALAR HWY_EMU128
202#if HWY_ARCH_WASM && defined(__wasm_simd128__)
203#if defined(HWY_WANT_WASM2)
204#define HWY_BASELINE_WASM HWY_WASM_EMU256
206#define HWY_BASELINE_WASM HWY_WASM
209#define HWY_BASELINE_WASM 0
213#if HWY_ARCH_PPC && defined(__VSX__) && 0
214#define HWY_BASELINE_PPC8 HWY_PPC8
216#define HWY_BASELINE_PPC8 0
219#define HWY_BASELINE_SVE2 0
220#define HWY_BASELINE_SVE 0
221#define HWY_BASELINE_NEON 0
225#if defined(__ARM_FEATURE_SVE2)
226#undef HWY_BASELINE_SVE2
229#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 128
230#define HWY_BASELINE_SVE2 HWY_SVE2_128
236#define HWY_BASELINE_SVE2 HWY_SVE2
240#if defined(__ARM_FEATURE_SVE)
241#undef HWY_BASELINE_SVE
243#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
244#define HWY_BASELINE_SVE HWY_SVE_256
246#define HWY_BASELINE_SVE HWY_SVE
251#if defined(__ARM_NEON__) || defined(__ARM_NEON)
252#undef HWY_BASELINE_NEON
253#define HWY_BASELINE_NEON HWY_NEON
264#define HWY_CHECK_SSSE3 1
265#define HWY_CHECK_SSE4 1
267#define HWY_CHECK_SSSE3 0
268#define HWY_CHECK_SSE4 0
273#define HWY_CHECK_PCLMUL_AES 1
274#define HWY_CHECK_BMI2_FMA 1
275#define HWY_CHECK_F16C 1
279#if defined(__SSSE3__)
280#define HWY_CHECK_SSSE3 1
282#define HWY_CHECK_SSSE3 0
285#if defined(__SSE4_1__) && defined(__SSE4_2__)
286#define HWY_CHECK_SSE4 1
288#define HWY_CHECK_SSE4 0
292#if defined(HWY_DISABLE_PCLMUL_AES) || (defined(__PCLMUL__) && defined(__AES__))
293#define HWY_CHECK_PCLMUL_AES 1
295#define HWY_CHECK_PCLMUL_AES 0
298#if defined(HWY_DISABLE_BMI2_FMA) || (defined(__BMI2__) && defined(__FMA__))
299#define HWY_CHECK_BMI2_FMA 1
301#define HWY_CHECK_BMI2_FMA 0
304#if defined(HWY_DISABLE_F16C) || defined(__F16C__)
305#define HWY_CHECK_F16C 1
307#define HWY_CHECK_F16C 0
312#if HWY_ARCH_X86 && (HWY_WANT_SSSE3 || HWY_CHECK_SSSE3)
313#define HWY_BASELINE_SSSE3 HWY_SSSE3
315#define HWY_BASELINE_SSSE3 0
318#if HWY_ARCH_X86 && (HWY_WANT_SSE4 || (HWY_CHECK_SSE4 && HWY_CHECK_PCLMUL_AES))
319#define HWY_BASELINE_SSE4 HWY_SSE4
321#define HWY_BASELINE_SSE4 0
324#if HWY_BASELINE_SSE4 != 0 && HWY_CHECK_BMI2_FMA && HWY_CHECK_F16C && \
326#define HWY_BASELINE_AVX2 HWY_AVX2
328#define HWY_BASELINE_AVX2 0
332#if HWY_BASELINE_AVX2 != 0 && defined(__AVX512F__) && defined(__AVX512BW__) && \
333 defined(__AVX512DQ__) && defined(__AVX512VL__)
334#define HWY_BASELINE_AVX3 HWY_AVX3
336#define HWY_BASELINE_AVX3 0
340#if HWY_BASELINE_AVX3 != 0 && defined(__AVXVNNI__) && defined(__VAES__) && \
341 defined(__VPCLMULQDQ__) && defined(__AVX512VBMI__) && \
342 defined(__AVX512VBMI2__) && defined(__AVX512VPOPCNTDQ__) && \
343 defined(__AVX512BITALG__)
344#define HWY_BASELINE_AVX3_DL HWY_AVX3_DL
346#define HWY_BASELINE_AVX3_DL 0
349#if HWY_ARCH_RVV && defined(__riscv_vector)
350#define HWY_BASELINE_RVV HWY_RVV
352#define HWY_BASELINE_RVV 0
356#ifndef HWY_BASELINE_TARGETS
357#define HWY_BASELINE_TARGETS \
358 (HWY_BASELINE_SCALAR | HWY_BASELINE_WASM | HWY_BASELINE_PPC8 | \
359 HWY_BASELINE_SVE2 | HWY_BASELINE_SVE | HWY_BASELINE_NEON | \
360 HWY_BASELINE_SSSE3 | HWY_BASELINE_SSE4 | HWY_BASELINE_AVX2 | \
361 HWY_BASELINE_AVX3 | HWY_BASELINE_AVX3_DL | HWY_BASELINE_RVV)
367#define HWY_ENABLED_BASELINE HWY_ENABLED(HWY_BASELINE_TARGETS)
368#if HWY_ENABLED_BASELINE == 0
369#error "At least one baseline target must be defined and enabled"
374#define HWY_STATIC_TARGET (HWY_ENABLED_BASELINE & -HWY_ENABLED_BASELINE)
380#define HWY_TARGET HWY_STATIC_TARGET
385#if 1 < (defined(HWY_COMPILE_ONLY_SCALAR) + defined(HWY_COMPILE_ONLY_EMU128) + \
386 defined(HWY_COMPILE_ONLY_STATIC))
387#error "Can only define one of HWY_COMPILE_ONLY_{SCALAR|EMU128|STATIC} - bug?"
393#define HWY_HAVE_RUNTIME_DISPATCH 1
396#elif HWY_ARCH_ARM && HWY_COMPILER_GCC_ACTUAL && HWY_OS_LINUX && !defined(TOOLCHAIN_MISS_SYS_AUXV_H)
397#define HWY_HAVE_RUNTIME_DISPATCH 1
399#define HWY_HAVE_RUNTIME_DISPATCH 0
405#if defined(HWY_WANT_AVX3_DL) || (HWY_BASELINE & HWY_AVX3_DL)
406#define HWY_ATTAINABLE_AVX3_DL HWY_AVX3_DL
408#define HWY_ATTAINABLE_AVX3_DL 0
411#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
412 (HWY_ENABLED_BASELINE & (HWY_SVE | HWY_SVE_256)))
413#define HWY_ATTAINABLE_SVE HWY_ENABLED(HWY_SVE | HWY_SVE_256)
415#define HWY_ATTAINABLE_SVE 0
418#if HWY_ARCH_ARM_A64 && (HWY_HAVE_RUNTIME_DISPATCH || \
419 (HWY_ENABLED_BASELINE & (HWY_SVE2 | HWY_SVE2_128)))
420#define HWY_ATTAINABLE_SVE2 HWY_ENABLED(HWY_SVE2 | HWY_SVE2_128)
422#define HWY_ATTAINABLE_SVE2 0
428#define HWY_ATTAINABLE_TARGETS \
429 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_SSSE3 | HWY_SSE4 | HWY_AVX2 | \
430 HWY_AVX3 | HWY_ATTAINABLE_AVX3_DL)
431#elif HWY_ARCH_ARM && HWY_HAVE_RUNTIME_DISPATCH
432#define HWY_ATTAINABLE_TARGETS \
433 HWY_ENABLED(HWY_BASELINE_SCALAR | HWY_NEON | HWY_ATTAINABLE_SVE | \
436#define HWY_ATTAINABLE_TARGETS \
437 (HWY_ENABLED_BASELINE | HWY_ATTAINABLE_SVE | HWY_ATTAINABLE_SVE2)
441#if defined(HWY_COMPILE_ONLY_EMU128) && !HWY_BROKEN_EMU128
442#undef HWY_STATIC_TARGET
443#define HWY_STATIC_TARGET HWY_EMU128
444#define HWY_TARGETS HWY_EMU128
448#elif defined(HWY_COMPILE_ONLY_SCALAR) || \
449 (defined(HWY_COMPILE_ONLY_EMU128) && HWY_BROKEN_EMU128)
450#undef HWY_STATIC_TARGET
451#define HWY_STATIC_TARGET HWY_SCALAR
452#define HWY_TARGETS HWY_SCALAR
455#elif defined(HWY_COMPILE_ONLY_STATIC)
456#define HWY_TARGETS HWY_STATIC_TARGET
459#elif defined(HWY_COMPILE_ALL_ATTAINABLE) || defined(HWY_IS_TEST)
460#define HWY_TARGETS HWY_ATTAINABLE_TARGETS
468 (HWY_ATTAINABLE_TARGETS & ((HWY_STATIC_TARGET - 1LL) | HWY_STATIC_TARGET))
475#if (HWY_TARGETS & HWY_STATIC_TARGET) == 0
476#error "Logic error: best baseline should be included in dynamic targets"