Grok
10.0.5
src
lib
core
highway
hwy
ops
set_macros-inl.h
Go to the documentation of this file.
1
// Copyright 2020 Google LLC
2
// SPDX-License-Identifier: Apache-2.0
3
//
4
// Licensed under the Apache License, Version 2.0 (the "License");
5
// you may not use this file except in compliance with the License.
6
// You may obtain a copy of the License at
7
//
8
// http://www.apache.org/licenses/LICENSE-2.0
9
//
10
// Unless required by applicable law or agreed to in writing, software
11
// distributed under the License is distributed on an "AS IS" BASIS,
12
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
// See the License for the specific language governing permissions and
14
// limitations under the License.
15
16
// Sets macros based on HWY_TARGET.
17
18
// This include guard is toggled by foreach_target, so avoid the usual _H_
19
// suffix to prevent copybara from renaming it.
20
#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21
#ifdef HWY_SET_MACROS_PER_TARGET
22
#undef HWY_SET_MACROS_PER_TARGET
23
#else
24
#define HWY_SET_MACROS_PER_TARGET
25
#endif
26
27
#endif
// HWY_SET_MACROS_PER_TARGET
28
29
#include "
hwy/detect_targets.h
"
30
31
#undef HWY_NAMESPACE
32
#undef HWY_ALIGN
33
#undef HWY_MAX_BYTES
34
#undef HWY_LANES
35
36
#undef HWY_HAVE_SCALABLE
37
#undef HWY_HAVE_INTEGER64
38
#undef HWY_HAVE_FLOAT16
39
#undef HWY_HAVE_FLOAT64
40
#undef HWY_MEM_OPS_MIGHT_FAULT
41
#undef HWY_NATIVE_FMA
42
#undef HWY_CAP_GE256
43
#undef HWY_CAP_GE512
44
45
#undef HWY_TARGET_STR
46
47
#if defined(HWY_DISABLE_PCLMUL_AES)
48
#define HWY_TARGET_STR_PCLMUL_AES ""
49
#else
50
#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
51
#endif
52
53
#if defined(HWY_DISABLE_BMI2_FMA)
54
#define HWY_TARGET_STR_BMI2_FMA ""
55
#else
56
#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
57
#endif
58
59
#if defined(HWY_DISABLE_F16C)
60
#define HWY_TARGET_STR_F16C ""
61
#else
62
#define HWY_TARGET_STR_F16C ",f16c"
63
#endif
64
65
#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
66
67
#define HWY_TARGET_STR_SSE4 \
68
HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2"
HWY_TARGET_STR_PCLMUL_AES
69
// Include previous targets, which are the half-vectors of the next target.
70
#define HWY_TARGET_STR_AVX2 \
71
HWY_TARGET_STR_SSE4 ",avx,avx2"
HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
72
#define HWY_TARGET_STR_AVX3 \
73
HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
74
75
// Before include guard so we redefine HWY_TARGET_STR on each include,
76
// governed by the current HWY_TARGET.
77
78
//-----------------------------------------------------------------------------
79
// SSSE3
80
#if HWY_TARGET == HWY_SSSE3
81
82
#define HWY_NAMESPACE N_SSSE3
83
#define HWY_ALIGN alignas(16)
84
#define HWY_MAX_BYTES 16
85
#define HWY_LANES(T) (16 / sizeof(T))
86
87
#define HWY_HAVE_SCALABLE 0
88
#define HWY_HAVE_INTEGER64 1
89
#define HWY_HAVE_FLOAT16 1
90
#define HWY_HAVE_FLOAT64 1
91
#define HWY_MEM_OPS_MIGHT_FAULT 1
92
#define HWY_NATIVE_FMA 0
93
#define HWY_CAP_GE256 0
94
#define HWY_CAP_GE512 0
95
96
#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
97
98
//-----------------------------------------------------------------------------
99
// SSE4
100
#elif HWY_TARGET == HWY_SSE4
101
102
#define HWY_NAMESPACE N_SSE4
103
#define HWY_ALIGN alignas(16)
104
#define HWY_MAX_BYTES 16
105
#define HWY_LANES(T) (16 / sizeof(T))
106
107
#define HWY_HAVE_SCALABLE 0
108
#define HWY_HAVE_INTEGER64 1
109
#define HWY_HAVE_FLOAT16 1
110
#define HWY_HAVE_FLOAT64 1
111
#define HWY_MEM_OPS_MIGHT_FAULT 1
112
#define HWY_NATIVE_FMA 0
113
#define HWY_CAP_GE256 0
114
#define HWY_CAP_GE512 0
115
116
#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
117
118
//-----------------------------------------------------------------------------
119
// AVX2
120
#elif HWY_TARGET == HWY_AVX2
121
122
#define HWY_NAMESPACE N_AVX2
123
#define HWY_ALIGN alignas(32)
124
#define HWY_MAX_BYTES 32
125
#define HWY_LANES(T) (32 / sizeof(T))
126
127
#define HWY_HAVE_SCALABLE 0
128
#define HWY_HAVE_INTEGER64 1
129
#define HWY_HAVE_FLOAT16 1
130
#define HWY_HAVE_FLOAT64 1
131
#define HWY_MEM_OPS_MIGHT_FAULT 1
132
133
#ifdef HWY_DISABLE_BMI2_FMA
134
#define HWY_NATIVE_FMA 0
135
#else
136
#define HWY_NATIVE_FMA 1
137
#endif
138
139
#define HWY_CAP_GE256 1
140
#define HWY_CAP_GE512 0
141
142
#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
143
144
//-----------------------------------------------------------------------------
145
// AVX3[_DL]
146
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
147
148
#define HWY_ALIGN alignas(64)
149
#define HWY_MAX_BYTES 64
150
#define HWY_LANES(T) (64 / sizeof(T))
151
152
#define HWY_HAVE_SCALABLE 0
153
#define HWY_HAVE_INTEGER64 1
154
#define HWY_HAVE_FLOAT16 1
155
#define HWY_HAVE_FLOAT64 1
156
#define HWY_MEM_OPS_MIGHT_FAULT 0
157
#define HWY_NATIVE_FMA 1
158
#define HWY_CAP_GE256 1
159
#define HWY_CAP_GE512 1
160
161
#if HWY_TARGET == HWY_AVX3
162
163
#define HWY_NAMESPACE N_AVX3
164
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
165
166
#elif HWY_TARGET == HWY_AVX3_DL
167
168
#define HWY_NAMESPACE N_AVX3_DL
169
#define HWY_TARGET_STR \
170
HWY_TARGET_STR_AVX3 \
171
",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avxvnni,avx512bitalg,"
\
172
"avx512vpopcntdq"
173
174
#else
175
#error "Logic error"
176
#endif
// HWY_TARGET == HWY_AVX3_DL
177
178
//-----------------------------------------------------------------------------
179
// PPC8
180
#elif HWY_TARGET == HWY_PPC8
181
182
#define HWY_ALIGN alignas(16)
183
#define HWY_MAX_BYTES 16
184
#define HWY_LANES(T) (16 / sizeof(T))
185
186
#define HWY_HAVE_SCALABLE 0
187
#define HWY_HAVE_INTEGER64 1
188
#define HWY_HAVE_FLOAT16 0
189
#define HWY_HAVE_FLOAT64 1
190
#define HWY_MEM_OPS_MIGHT_FAULT 1
191
#define HWY_NATIVE_FMA 1
192
#define HWY_CAP_GE256 0
193
#define HWY_CAP_GE512 0
194
195
#define HWY_NAMESPACE N_PPC8
196
197
#define HWY_TARGET_STR "altivec,vsx"
198
199
//-----------------------------------------------------------------------------
200
// NEON
201
#elif HWY_TARGET == HWY_NEON
202
203
#define HWY_ALIGN alignas(16)
204
#define HWY_MAX_BYTES 16
205
#define HWY_LANES(T) (16 / sizeof(T))
206
207
#define HWY_HAVE_SCALABLE 0
208
#define HWY_HAVE_INTEGER64 1
209
#define HWY_HAVE_FLOAT16 1
210
211
#if HWY_ARCH_ARM_A64
212
#define HWY_HAVE_FLOAT64 1
213
#else
214
#define HWY_HAVE_FLOAT64 0
215
#endif
216
217
#define HWY_MEM_OPS_MIGHT_FAULT 1
218
219
#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
220
#define HWY_NATIVE_FMA 1
221
#else
222
#define HWY_NATIVE_FMA 0
223
#endif
224
225
#define HWY_CAP_GE256 0
226
#define HWY_CAP_GE512 0
227
228
#define HWY_NAMESPACE N_NEON
229
230
// Can use pragmas instead of -march compiler flag
231
#if HWY_HAVE_RUNTIME_DISPATCH
232
#if HWY_ARCH_ARM_V7
233
#define HWY_TARGET_STR "+neon-vfpv4"
234
#else
235
#define HWY_TARGET_STR "+crypto"
236
#endif
// HWY_ARCH_ARM_V7
237
#else
238
// HWY_TARGET_STR remains undefined
239
#endif
240
241
//-----------------------------------------------------------------------------
242
// SVE[2]
243
#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
244
HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
245
246
// SVE only requires lane alignment, not natural alignment of the entire vector.
247
#define HWY_ALIGN alignas(8)
248
249
// Value ensures MaxLanes() is the tightest possible upper bound to reduce
250
// overallocation.
251
#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
252
253
#define HWY_HAVE_SCALABLE 1
254
#define HWY_HAVE_INTEGER64 1
255
#define HWY_HAVE_FLOAT16 1
256
#define HWY_HAVE_FLOAT64 1
257
#define HWY_MEM_OPS_MIGHT_FAULT 0
258
#define HWY_NATIVE_FMA 1
259
#define HWY_CAP_GE256 0
260
#define HWY_CAP_GE512 0
261
262
#if HWY_TARGET == HWY_SVE2
263
#define HWY_NAMESPACE N_SVE2
264
#define HWY_MAX_BYTES 256
265
#elif HWY_TARGET == HWY_SVE_256
266
#define HWY_NAMESPACE N_SVE_256
267
#define HWY_MAX_BYTES 32
268
#elif HWY_TARGET == HWY_SVE2_128
269
#define HWY_NAMESPACE N_SVE2_128
270
#define HWY_MAX_BYTES 16
271
#else
272
#define HWY_NAMESPACE N_SVE
273
#define HWY_MAX_BYTES 256
274
#endif
275
276
// Can use pragmas instead of -march compiler flag
277
#if HWY_HAVE_RUNTIME_DISPATCH
278
#if HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE2_128
279
#define HWY_TARGET_STR "+sve2-aes"
280
#else
281
#define HWY_TARGET_STR "+sve"
282
#endif
283
#else
284
// HWY_TARGET_STR remains undefined
285
#endif
286
287
//-----------------------------------------------------------------------------
288
// WASM
289
#elif HWY_TARGET == HWY_WASM
290
291
#define HWY_ALIGN alignas(16)
292
#define HWY_MAX_BYTES 16
293
#define HWY_LANES(T) (16 / sizeof(T))
294
295
#define HWY_HAVE_SCALABLE 0
296
#define HWY_HAVE_INTEGER64 1
297
#define HWY_HAVE_FLOAT16 1
298
#define HWY_HAVE_FLOAT64 0
299
#define HWY_MEM_OPS_MIGHT_FAULT 1
300
#define HWY_NATIVE_FMA 0
301
#define HWY_CAP_GE256 0
302
#define HWY_CAP_GE512 0
303
304
#define HWY_NAMESPACE N_WASM
305
306
#define HWY_TARGET_STR "simd128"
307
308
//-----------------------------------------------------------------------------
309
// WASM_EMU256
310
#elif HWY_TARGET == HWY_WASM_EMU256
311
312
#define HWY_ALIGN alignas(32)
313
#define HWY_MAX_BYTES 32
314
#define HWY_LANES(T) (32 / sizeof(T))
315
316
#define HWY_HAVE_SCALABLE 0
317
#define HWY_HAVE_INTEGER64 1
318
#define HWY_HAVE_FLOAT16 1
319
#define HWY_HAVE_FLOAT64 0
320
#define HWY_MEM_OPS_MIGHT_FAULT 1
321
#define HWY_NATIVE_FMA 0
322
#define HWY_CAP_GE256 1
323
#define HWY_CAP_GE512 0
324
325
#define HWY_NAMESPACE N_WASM_EMU256
326
327
#define HWY_TARGET_STR "simd128"
328
329
//-----------------------------------------------------------------------------
330
// RVV
331
#elif HWY_TARGET == HWY_RVV
332
333
// RVV only requires lane alignment, not natural alignment of the entire vector,
334
// and the compiler already aligns builtin types, so nothing to do here.
335
#define HWY_ALIGN
336
337
// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
338
#define HWY_MAX_BYTES 65536
339
340
// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
341
// LMUL. This is the tightest possible upper bound.
342
#define HWY_LANES(T) (8192 / sizeof(T))
343
344
#define HWY_HAVE_SCALABLE 1
345
#define HWY_HAVE_INTEGER64 1
346
#define HWY_HAVE_FLOAT64 1
347
#define HWY_MEM_OPS_MIGHT_FAULT 0
348
#define HWY_NATIVE_FMA 1
349
#define HWY_CAP_GE256 0
350
#define HWY_CAP_GE512 0
351
352
#if defined(__riscv_zvfh)
353
#define HWY_HAVE_FLOAT16 1
354
#else
355
#define HWY_HAVE_FLOAT16 0
356
#endif
357
358
#define HWY_NAMESPACE N_RVV
359
360
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
361
// (rv64gcv is not a valid target)
362
363
//-----------------------------------------------------------------------------
364
// EMU128
365
#elif HWY_TARGET == HWY_EMU128
366
367
#define HWY_ALIGN alignas(16)
368
#define HWY_MAX_BYTES 16
369
#define HWY_LANES(T) (16 / sizeof(T))
370
371
#define HWY_HAVE_SCALABLE 0
372
#define HWY_HAVE_INTEGER64 1
373
#define HWY_HAVE_FLOAT16 1
374
#define HWY_HAVE_FLOAT64 1
375
#define HWY_MEM_OPS_MIGHT_FAULT 1
376
#define HWY_NATIVE_FMA 0
377
#define HWY_CAP_GE256 0
378
#define HWY_CAP_GE512 0
379
380
#define HWY_NAMESPACE N_EMU128
381
382
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
383
384
//-----------------------------------------------------------------------------
385
// SCALAR
386
#elif HWY_TARGET == HWY_SCALAR
387
388
#define HWY_ALIGN
389
#define HWY_MAX_BYTES 8
390
#define HWY_LANES(T) 1
391
392
#define HWY_HAVE_SCALABLE 0
393
#define HWY_HAVE_INTEGER64 1
394
#define HWY_HAVE_FLOAT16 1
395
#define HWY_HAVE_FLOAT64 1
396
#define HWY_MEM_OPS_MIGHT_FAULT 0
397
#define HWY_NATIVE_FMA 0
398
#define HWY_CAP_GE256 0
399
#define HWY_CAP_GE512 0
400
401
#define HWY_NAMESPACE N_SCALAR
402
403
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
404
405
#else
406
#pragma message("HWY_TARGET does not match any known target"
)
407
#endif
// HWY_TARGET
408
409
// Override this to 1 in asan/msan builds, which will still fault.
410
#if HWY_IS_ASAN || HWY_IS_MSAN
411
#undef HWY_MEM_OPS_MIGHT_FAULT
412
#define HWY_MEM_OPS_MIGHT_FAULT 1
413
#endif
414
415
// Clang <9 requires this be invoked at file scope, before any namespace.
416
#undef HWY_BEFORE_NAMESPACE
417
#if defined(HWY_TARGET_STR)
418
#define HWY_BEFORE_NAMESPACE() \
419
HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
420
static_assert(true, "For requiring trailing semicolon"
)
421
#else
422
// avoids compiler warning if no HWY_TARGET_STR
423
#define HWY_BEFORE_NAMESPACE() \
424
static_assert(true, "For requiring trailing semicolon"
)
425
#endif
426
427
// Clang <9 requires any namespaces be closed before this macro.
428
#undef HWY_AFTER_NAMESPACE
429
#if defined(HWY_TARGET_STR)
430
#define HWY_AFTER_NAMESPACE() \
431
HWY_POP_ATTRIBUTES \
432
static_assert(true, "For requiring trailing semicolon"
)
433
#else
434
// avoids compiler warning if no HWY_TARGET_STR
435
#define HWY_AFTER_NAMESPACE() \
436
static_assert(true, "For requiring trailing semicolon"
)
437
#endif
438
439
#undef HWY_ATTR
440
#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
441
#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
442
#else
443
#define HWY_ATTR
444
#endif
detect_targets.h
Generated by
1.9.4