LLVM OpenMP* Runtime Library
kmp_os.h
1 /*
2  * kmp_os.h -- KPTS runtime header file.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_OS_H
14 #define KMP_OS_H
15 
16 #include "kmp_config.h"
17 #include <atomic>
18 #include <stdarg.h>
19 #include <stdlib.h>
20 
21 #define KMP_FTN_PLAIN 1
22 #define KMP_FTN_APPEND 2
23 #define KMP_FTN_UPPER 3
24 /*
25 #define KMP_FTN_PREPEND 4
26 #define KMP_FTN_UAPPEND 5
27 */
28 
29 #define KMP_PTR_SKIP (sizeof(void *))
30 
31 /* -------------------------- Compiler variations ------------------------ */
32 
33 #define KMP_OFF 0
34 #define KMP_ON 1
35 
36 #define KMP_MEM_CONS_VOLATILE 0
37 #define KMP_MEM_CONS_FENCE 1
38 
39 #ifndef KMP_MEM_CONS_MODEL
40 #define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE
41 #endif
42 
43 #ifndef __has_cpp_attribute
44 #define __has_cpp_attribute(x) 0
45 #endif
46 
47 #ifndef __has_attribute
48 #define __has_attribute(x) 0
49 #endif
50 
51 /* ------------------------- Compiler recognition ---------------------- */
52 #define KMP_COMPILER_ICC 0
53 #define KMP_COMPILER_GCC 0
54 #define KMP_COMPILER_CLANG 0
55 #define KMP_COMPILER_MSVC 0
56 #define KMP_COMPILER_ICX 0
57 
58 #if __INTEL_CLANG_COMPILER
59 #undef KMP_COMPILER_ICX
60 #define KMP_COMPILER_ICX 1
61 #elif defined(__INTEL_COMPILER)
62 #undef KMP_COMPILER_ICC
63 #define KMP_COMPILER_ICC 1
64 #elif defined(__clang__)
65 #undef KMP_COMPILER_CLANG
66 #define KMP_COMPILER_CLANG 1
67 #elif defined(__GNUC__)
68 #undef KMP_COMPILER_GCC
69 #define KMP_COMPILER_GCC 1
70 #elif defined(_MSC_VER)
71 #undef KMP_COMPILER_MSVC
72 #define KMP_COMPILER_MSVC 1
73 #else
74 #error Unknown compiler
75 #endif
76 
77 #if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD)
78 #define KMP_AFFINITY_SUPPORTED 1
79 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
80 #define KMP_GROUP_AFFINITY 1
81 #else
82 #define KMP_GROUP_AFFINITY 0
83 #endif
84 #else
85 #define KMP_AFFINITY_SUPPORTED 0
86 #define KMP_GROUP_AFFINITY 0
87 #endif
88 
89 /* Check for quad-precision extension. */
90 #define KMP_HAVE_QUAD 0
91 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
92 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
93 /* _Quad is already defined for icc */
94 #undef KMP_HAVE_QUAD
95 #define KMP_HAVE_QUAD 1
96 #elif KMP_COMPILER_CLANG
97 /* Clang doesn't support a software-implemented
98  128-bit extended precision type yet */
99 typedef long double _Quad;
100 #elif KMP_COMPILER_GCC
101 /* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad */
102 #if !KMP_OS_NETBSD
103 typedef __float128 _Quad;
104 #undef KMP_HAVE_QUAD
105 #define KMP_HAVE_QUAD 1
106 #endif
107 #elif KMP_COMPILER_MSVC
108 typedef long double _Quad;
109 #endif
110 #else
111 #if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC
112 typedef long double _Quad;
113 #undef KMP_HAVE_QUAD
114 #define KMP_HAVE_QUAD 1
115 #endif
116 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
117 
118 #define KMP_USE_X87CONTROL 0
119 #if KMP_OS_WINDOWS
120 #define KMP_END_OF_LINE "\r\n"
121 typedef char kmp_int8;
122 typedef unsigned char kmp_uint8;
123 typedef short kmp_int16;
124 typedef unsigned short kmp_uint16;
125 typedef int kmp_int32;
126 typedef unsigned int kmp_uint32;
127 #define KMP_INT32_SPEC "d"
128 #define KMP_UINT32_SPEC "u"
129 #ifndef KMP_STRUCT64
130 typedef __int64 kmp_int64;
131 typedef unsigned __int64 kmp_uint64;
132 #define KMP_INT64_SPEC "I64d"
133 #define KMP_UINT64_SPEC "I64u"
134 #else
135 struct kmp_struct64 {
136  kmp_int32 a, b;
137 };
138 typedef struct kmp_struct64 kmp_int64;
139 typedef struct kmp_struct64 kmp_uint64;
140 /* Not sure what to use for KMP_[U]INT64_SPEC here */
141 #endif
142 #if KMP_ARCH_X86 && KMP_MSVC_COMPAT
143 #undef KMP_USE_X87CONTROL
144 #define KMP_USE_X87CONTROL 1
145 #endif
146 #if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64
147 #define KMP_INTPTR 1
148 typedef __int64 kmp_intptr_t;
149 typedef unsigned __int64 kmp_uintptr_t;
150 #define KMP_INTPTR_SPEC "I64d"
151 #define KMP_UINTPTR_SPEC "I64u"
152 #endif
153 #endif /* KMP_OS_WINDOWS */
154 
155 #if KMP_OS_UNIX
156 #define KMP_END_OF_LINE "\n"
157 typedef char kmp_int8;
158 typedef unsigned char kmp_uint8;
159 typedef short kmp_int16;
160 typedef unsigned short kmp_uint16;
161 typedef int kmp_int32;
162 typedef unsigned int kmp_uint32;
163 typedef long long kmp_int64;
164 typedef unsigned long long kmp_uint64;
165 #define KMP_INT32_SPEC "d"
166 #define KMP_UINT32_SPEC "u"
167 #define KMP_INT64_SPEC "lld"
168 #define KMP_UINT64_SPEC "llu"
169 #endif /* KMP_OS_UNIX */
170 
171 #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS
172 #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC
173 #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
174  KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
175 #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC
176 #else
177 #error "Can't determine size_t printf format specifier."
178 #endif
179 
180 #if KMP_ARCH_X86
181 #define KMP_SIZE_T_MAX (0xFFFFFFFF)
182 #else
183 #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF)
184 #endif
185 
186 typedef size_t kmp_size_t;
187 typedef float kmp_real32;
188 typedef double kmp_real64;
189 
190 #ifndef KMP_INTPTR
191 #define KMP_INTPTR 1
192 typedef long kmp_intptr_t;
193 typedef unsigned long kmp_uintptr_t;
194 #define KMP_INTPTR_SPEC "ld"
195 #define KMP_UINTPTR_SPEC "lu"
196 #endif
197 
198 #ifdef BUILD_I8
199 typedef kmp_int64 kmp_int;
200 typedef kmp_uint64 kmp_uint;
201 #else
202 typedef kmp_int32 kmp_int;
203 typedef kmp_uint32 kmp_uint;
204 #endif /* BUILD_I8 */
205 #define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF)
206 #define KMP_INT_MIN ((kmp_int32)0x80000000)
207 
208 // stdarg handling
209 #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && \
210  (KMP_OS_FREEBSD || KMP_OS_LINUX)
211 typedef va_list *kmp_va_list;
212 #define kmp_va_deref(ap) (*(ap))
213 #define kmp_va_addr_of(ap) (&(ap))
214 #else
215 typedef va_list kmp_va_list;
216 #define kmp_va_deref(ap) (ap)
217 #define kmp_va_addr_of(ap) (ap)
218 #endif
219 
220 #ifdef __cplusplus
221 // macros to cast out qualifiers and to re-interpret types
222 #define CCAST(type, var) const_cast<type>(var)
223 #define RCAST(type, var) reinterpret_cast<type>(var)
224 //-------------------------------------------------------------------------
225 // template for debug prints specification ( d, u, lld, llu ), and to obtain
226 // signed/unsigned flavors of a type
227 template <typename T> struct traits_t {};
228 // int
229 template <> struct traits_t<signed int> {
230  typedef signed int signed_t;
231  typedef unsigned int unsigned_t;
232  typedef double floating_t;
233  static char const *spec;
234  static const signed_t max_value = 0x7fffffff;
235  static const signed_t min_value = 0x80000000;
236  static const int type_size = sizeof(signed_t);
237 };
238 // unsigned int
239 template <> struct traits_t<unsigned int> {
240  typedef signed int signed_t;
241  typedef unsigned int unsigned_t;
242  typedef double floating_t;
243  static char const *spec;
244  static const unsigned_t max_value = 0xffffffff;
245  static const unsigned_t min_value = 0x00000000;
246  static const int type_size = sizeof(unsigned_t);
247 };
248 // long
249 template <> struct traits_t<signed long> {
250  typedef signed long signed_t;
251  typedef unsigned long unsigned_t;
252  typedef long double floating_t;
253  static char const *spec;
254  static const int type_size = sizeof(signed_t);
255 };
256 // long long
257 template <> struct traits_t<signed long long> {
258  typedef signed long long signed_t;
259  typedef unsigned long long unsigned_t;
260  typedef long double floating_t;
261  static char const *spec;
262  static const signed_t max_value = 0x7fffffffffffffffLL;
263  static const signed_t min_value = 0x8000000000000000LL;
264  static const int type_size = sizeof(signed_t);
265 };
266 // unsigned long long
267 template <> struct traits_t<unsigned long long> {
268  typedef signed long long signed_t;
269  typedef unsigned long long unsigned_t;
270  typedef long double floating_t;
271  static char const *spec;
272  static const unsigned_t max_value = 0xffffffffffffffffLL;
273  static const unsigned_t min_value = 0x0000000000000000LL;
274  static const int type_size = sizeof(unsigned_t);
275 };
276 //-------------------------------------------------------------------------
277 #else
278 #define CCAST(type, var) (type)(var)
279 #define RCAST(type, var) (type)(var)
280 #endif // __cplusplus
281 
282 #define KMP_EXPORT extern /* export declaration in guide libraries */
283 
284 #if __GNUC__ >= 4 && !defined(__MINGW32__)
285 #define __forceinline __inline
286 #endif
287 
288 /* Check if the OS/arch can support user-level mwait */
289 // All mwait code tests for UMWAIT first, so it should only fall back to ring3
290 // MWAIT for KNL.
291 #define KMP_HAVE_MWAIT \
292  ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
293  !KMP_MIC2)
294 #define KMP_HAVE_UMWAIT \
295  ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \
296  !KMP_MIC)
297 
298 #if KMP_OS_WINDOWS
299 #include <windows.h>
300 
301 static inline int KMP_GET_PAGE_SIZE(void) {
302  SYSTEM_INFO si;
303  GetSystemInfo(&si);
304  return si.dwPageSize;
305 }
306 #else
307 #define KMP_GET_PAGE_SIZE() getpagesize()
308 #endif
309 
310 #define PAGE_ALIGNED(_addr) \
311  (!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1)))
312 #define ALIGN_TO_PAGE(x) \
313  (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1)))
314 
315 /* ---------- Support for cache alignment, padding, etc. ----------------*/
316 
317 #ifdef __cplusplus
318 extern "C" {
319 #endif // __cplusplus
320 
321 #define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */
322 
323 /* Define the default size of the cache line */
324 #ifndef CACHE_LINE
325 #define CACHE_LINE 128 /* cache line size in bytes */
326 #else
327 #if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN)
328 // 2006-02-13: This produces too many warnings on OS X*. Disable for now
329 #warning CACHE_LINE is too small.
330 #endif
331 #endif /* CACHE_LINE */
332 
333 #define KMP_CACHE_PREFETCH(ADDR) /* nothing */
334 
335 // Define attribute that indicates that the fall through from the previous
336 // case label is intentional and should not be diagnosed by a compiler
337 // Code from libcxx/include/__config
338 // Use a function like macro to imply that it must be followed by a semicolon
339 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
340 #define KMP_FALLTHROUGH() [[fallthrough]]
341 #elif __has_cpp_attribute(clang::fallthrough)
342 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
343 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
344 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
345 #else
346 #define KMP_FALLTHROUGH() ((void)0)
347 #endif
348 
349 #if KMP_HAVE_ATTRIBUTE_WAITPKG
350 #define KMP_ATTRIBUTE_TARGET_WAITPKG __attribute__((target("waitpkg")))
351 #else
352 #define KMP_ATTRIBUTE_TARGET_WAITPKG /* Nothing */
353 #endif
354 
355 #if KMP_HAVE_ATTRIBUTE_RTM
356 #define KMP_ATTRIBUTE_TARGET_RTM __attribute__((target("rtm")))
357 #else
358 #define KMP_ATTRIBUTE_TARGET_RTM /* Nothing */
359 #endif
360 
361 // Define attribute that indicates a function does not return
362 #if __cplusplus >= 201103L
363 #define KMP_NORETURN [[noreturn]]
364 #elif KMP_OS_WINDOWS
365 #define KMP_NORETURN __declspec(noreturn)
366 #else
367 #define KMP_NORETURN __attribute__((noreturn))
368 #endif
369 
370 #if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
371 #define KMP_ALIGN(bytes) __declspec(align(bytes))
372 #define KMP_THREAD_LOCAL __declspec(thread)
373 #define KMP_ALIAS /* Nothing */
374 #else
375 #define KMP_ALIGN(bytes) __attribute__((aligned(bytes)))
376 #define KMP_THREAD_LOCAL __thread
377 #define KMP_ALIAS(alias_of) __attribute__((alias(alias_of)))
378 #endif
379 
380 #if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB
381 #define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak))
382 #else
383 #define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */
384 #endif
385 
386 #if KMP_HAVE_WEAK_ATTRIBUTE
387 #define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak))
388 #else
389 #define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */
390 #endif
391 
392 // Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME
393 #ifndef KMP_STR
394 #define KMP_STR(x) _KMP_STR(x)
395 #define _KMP_STR(x) #x
396 #endif
397 
398 #ifdef KMP_USE_VERSION_SYMBOLS
399 // If using versioned symbols, KMP_EXPAND_NAME prepends
400 // __kmp_api_ to the real API name
401 #define KMP_EXPAND_NAME(api_name) _KMP_EXPAND_NAME(api_name)
402 #define _KMP_EXPAND_NAME(api_name) __kmp_api_##api_name
403 #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) \
404  _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, "VERSION")
405 #define _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, default_ver) \
406  __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver_num##_alias \
407  __attribute__((alias(KMP_STR(__kmp_api_##api_name)))); \
408  __asm__( \
409  ".symver " KMP_STR(__kmp_api_##api_name##_##ver_num##_alias) "," KMP_STR( \
410  api_name) "@" ver_str "\n\t"); \
411  __asm__(".symver " KMP_STR(__kmp_api_##api_name) "," KMP_STR( \
412  api_name) "@@" default_ver "\n\t")
413 
414 #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str) \
415  _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, "VERSION")
416 #define _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, \
417  default_ver) \
418  __typeof__(__kmp_api_##apic_name) __kmp_api_##apic_name##_##ver_num##_alias \
419  __attribute__((alias(KMP_STR(__kmp_api_##apic_name)))); \
420  __asm__(".symver " KMP_STR(__kmp_api_##apic_name) "," KMP_STR( \
421  apic_name) "@@" default_ver "\n\t"); \
422  __asm__( \
423  ".symver " KMP_STR(__kmp_api_##apic_name##_##ver_num##_alias) "," KMP_STR( \
424  api_name) "@" ver_str "\n\t")
425 
426 #else // KMP_USE_VERSION_SYMBOLS
427 #define KMP_EXPAND_NAME(api_name) api_name
428 #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) /* Nothing */
429 #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, \
430  ver_str) /* Nothing */
431 #endif // KMP_USE_VERSION_SYMBOLS
432 
433 /* Temporary note: if performance testing of this passes, we can remove
434  all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */
435 #define KMP_DO_ALIGN(bytes) KMP_ALIGN(bytes)
436 #define KMP_ALIGN_CACHE KMP_ALIGN(CACHE_LINE)
437 #define KMP_ALIGN_CACHE_INTERNODE KMP_ALIGN(INTERNODE_CACHE_LINE)
438 
439 /* General purpose fence types for memory operations */
440 enum kmp_mem_fence_type {
441  kmp_no_fence, /* No memory fence */
442  kmp_acquire_fence, /* Acquire (read) memory fence */
443  kmp_release_fence, /* Release (write) memory fence */
444  kmp_full_fence /* Full (read+write) memory fence */
445 };
446 
447 // Synchronization primitives
448 
449 #if KMP_ASM_INTRINS && KMP_OS_WINDOWS
450 
451 #if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG
452 #pragma intrinsic(InterlockedExchangeAdd)
453 #pragma intrinsic(InterlockedCompareExchange)
454 #pragma intrinsic(InterlockedExchange)
455 #if !(KMP_COMPILER_ICX && KMP_32_BIT_ARCH)
456 #pragma intrinsic(InterlockedExchange64)
457 #endif
458 #endif
459 
460 // Using InterlockedIncrement / InterlockedDecrement causes a library loading
461 // ordering problem, so we use InterlockedExchangeAdd instead.
462 #define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1)
463 #define KMP_TEST_THEN_INC_ACQ32(p) \
464  InterlockedExchangeAdd((volatile long *)(p), 1)
465 #define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4)
466 #define KMP_TEST_THEN_ADD4_ACQ32(p) \
467  InterlockedExchangeAdd((volatile long *)(p), 4)
468 #define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1)
469 #define KMP_TEST_THEN_DEC_ACQ32(p) \
470  InterlockedExchangeAdd((volatile long *)(p), -1)
471 #define KMP_TEST_THEN_ADD32(p, v) \
472  InterlockedExchangeAdd((volatile long *)(p), (v))
473 
474 #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \
475  InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv))
476 
477 #define KMP_XCHG_FIXED32(p, v) \
478  InterlockedExchange((volatile long *)(p), (long)(v))
479 #define KMP_XCHG_FIXED64(p, v) \
480  InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v))
481 
482 inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) {
483  kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v);
484  return *(kmp_real32 *)&tmp;
485 }
486 
487 #define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v))
488 #define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v))
489 #define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v))
490 #define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v))
491 #define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v))
492 #define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v))
493 
494 extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v);
495 extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v);
496 extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v);
497 extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v);
498 extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v);
499 extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v);
500 extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v);
501 extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v);
502 
503 #if KMP_ARCH_AARCH64 && KMP_COMPILER_MSVC && !KMP_COMPILER_CLANG
504 #define KMP_TEST_THEN_INC64(p) _InterlockedExchangeAdd64((p), 1LL)
505 #define KMP_TEST_THEN_INC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 1LL)
506 #define KMP_TEST_THEN_ADD4_64(p) _InterlockedExchangeAdd64((p), 4LL)
507 // #define KMP_TEST_THEN_ADD4_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 4LL)
508 // #define KMP_TEST_THEN_DEC64(p) _InterlockedExchangeAdd64((p), -1LL)
509 // #define KMP_TEST_THEN_DEC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), -1LL)
510 // #define KMP_TEST_THEN_ADD8(p, v) _InterlockedExchangeAdd8((p), (v))
511 #define KMP_TEST_THEN_ADD64(p, v) _InterlockedExchangeAdd64((p), (v))
512 
513 #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \
514  __kmp_compare_and_store_acq8((p), (cv), (sv))
515 #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \
516  __kmp_compare_and_store_rel8((p), (cv), (sv))
517 #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \
518  __kmp_compare_and_store_acq16((p), (cv), (sv))
519 /*
520 #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \
521  __kmp_compare_and_store_rel16((p), (cv), (sv))
522 */
523 #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \
524  __kmp_compare_and_store_acq32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
525  (kmp_int32)(sv))
526 #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \
527  __kmp_compare_and_store_rel32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
528  (kmp_int32)(sv))
529 #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \
530  __kmp_compare_and_store_acq64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
531  (kmp_int64)(sv))
532 #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \
533  __kmp_compare_and_store_rel64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
534  (kmp_int64)(sv))
535 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
536  __kmp_compare_and_store_ptr((void *volatile *)(p), (void *)(cv), (void *)(sv))
537 
538 // KMP_COMPARE_AND_STORE expects this order: pointer, compare, exchange
539 // _InterlockedCompareExchange expects this order: pointer, exchange, compare
540 // KMP_COMPARE_AND_STORE also returns a bool indicating a successful write. A
541 // write is successful if the return value of _InterlockedCompareExchange is the
542 // same as the compare value.
543 inline kmp_int8 __kmp_compare_and_store_acq8(volatile kmp_int8 *p, kmp_int8 cv,
544  kmp_int8 sv) {
545  return _InterlockedCompareExchange8_acq(p, sv, cv) == cv;
546 }
547 
548 inline kmp_int8 __kmp_compare_and_store_rel8(volatile kmp_int8 *p, kmp_int8 cv,
549  kmp_int8 sv) {
550  return _InterlockedCompareExchange8_rel(p, sv, cv) == cv;
551 }
552 
553 inline kmp_int16 __kmp_compare_and_store_acq16(volatile kmp_int16 *p,
554  kmp_int16 cv, kmp_int16 sv) {
555  return _InterlockedCompareExchange16_acq(p, sv, cv) == cv;
556 }
557 
558 inline kmp_int16 __kmp_compare_and_store_rel16(volatile kmp_int16 *p,
559  kmp_int16 cv, kmp_int16 sv) {
560  return _InterlockedCompareExchange16_rel(p, sv, cv) == cv;
561 }
562 
563 inline kmp_int32 __kmp_compare_and_store_acq32(volatile kmp_int32 *p,
564  kmp_int32 cv, kmp_int32 sv) {
565  return _InterlockedCompareExchange_acq((volatile long *)p, sv, cv) == cv;
566 }
567 
568 inline kmp_int32 __kmp_compare_and_store_rel32(volatile kmp_int32 *p,
569  kmp_int32 cv, kmp_int32 sv) {
570  return _InterlockedCompareExchange_rel((volatile long *)p, sv, cv) == cv;
571 }
572 
573 inline kmp_int32 __kmp_compare_and_store_acq64(volatile kmp_int64 *p,
574  kmp_int64 cv, kmp_int64 sv) {
575  return _InterlockedCompareExchange64_acq(p, sv, cv) == cv;
576 }
577 
578 inline kmp_int32 __kmp_compare_and_store_rel64(volatile kmp_int64 *p,
579  kmp_int64 cv, kmp_int64 sv) {
580  return _InterlockedCompareExchange64_rel(p, sv, cv) == cv;
581 }
582 
583 inline kmp_int32 __kmp_compare_and_store_ptr(void *volatile *p, void *cv,
584  void *sv) {
585  return _InterlockedCompareExchangePointer(p, sv, cv) == cv;
586 }
587 
588 // The _RET versions return the value instead of a bool
589 /*
590 #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \
591  _InterlockedCompareExchange8((p), (sv), (cv))
592 #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \
593  _InterlockedCompareExchange16((p), (sv), (cv))
594 */
595 #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \
596  _InterlockedCompareExchange64((volatile kmp_int64 *)(p), (kmp_int64)(sv), \
597  (kmp_int64)(cv))
598 
599 /*
600 #define KMP_XCHG_FIXED8(p, v) \
601  _InterlockedExchange8((volatile kmp_int8 *)(p), (kmp_int8)(v));
602 */
603 // #define KMP_XCHG_FIXED16(p, v) _InterlockedExchange16((p), (v));
604 // #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)));
605 
606 // inline kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v) {
607 // kmp_int64 tmp = _InterlockedExchange64((volatile kmp_int64 *)p, *(kmp_int64
608 // *)&v); return *(kmp_real64 *)&tmp;
609 // }
610 
611 #else // !KMP_ARCH_AARCH64
612 
613 // Routines that we still need to implement in assembly.
614 extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v);
615 
616 extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv,
617  kmp_int8 sv);
618 extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv,
619  kmp_int16 sv);
620 extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv,
621  kmp_int32 sv);
622 extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv,
623  kmp_int64 sv);
624 extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv,
625  kmp_int8 sv);
626 extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p,
627  kmp_int16 cv, kmp_int16 sv);
628 extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p,
629  kmp_int32 cv, kmp_int32 sv);
630 extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p,
631  kmp_int64 cv, kmp_int64 sv);
632 
633 extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v);
634 extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v);
635 extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v);
636 extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v);
637 extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v);
638 extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
639 
640 //#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1)
641 //#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1)
642 #define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL)
643 #define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL)
644 //#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4)
645 //#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4)
646 #define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL)
647 #define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL)
648 //#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1)
649 //#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1)
650 #define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL)
651 #define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL)
652 //#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v))
653 #define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v))
654 #define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v))
655 
656 
657 #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \
658  __kmp_compare_and_store8((p), (cv), (sv))
659 #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \
660  __kmp_compare_and_store8((p), (cv), (sv))
661 #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \
662  __kmp_compare_and_store16((p), (cv), (sv))
663 #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \
664  __kmp_compare_and_store16((p), (cv), (sv))
665 #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \
666  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
667  (kmp_int32)(sv))
668 #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \
669  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
670  (kmp_int32)(sv))
671 #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \
672  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
673  (kmp_int64)(sv))
674 #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \
675  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
676  (kmp_int64)(sv))
677 
678 #if KMP_ARCH_X86
679 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
680  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
681  (kmp_int32)(sv))
682 #else /* 64 bit pointers */
683 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
684  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
685  (kmp_int64)(sv))
686 #endif /* KMP_ARCH_X86 */
687 
688 #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \
689  __kmp_compare_and_store_ret8((p), (cv), (sv))
690 #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \
691  __kmp_compare_and_store_ret16((p), (cv), (sv))
692 #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \
693  __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
694  (kmp_int64)(sv))
695 
696 #define KMP_XCHG_FIXED8(p, v) \
697  __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v));
698 #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v));
699 //#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v));
700 //#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v));
701 //#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v));
702 #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v));
703 #endif
704 
705 #elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64)
706 
707 /* cast p to correct type so that proper intrinsic will be used */
708 #define KMP_TEST_THEN_INC32(p) \
709  __sync_fetch_and_add((volatile kmp_int32 *)(p), 1)
710 #define KMP_TEST_THEN_INC_ACQ32(p) \
711  __sync_fetch_and_add((volatile kmp_int32 *)(p), 1)
712 #if KMP_ARCH_MIPS
713 #define KMP_TEST_THEN_INC64(p) \
714  __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)
715 #define KMP_TEST_THEN_INC_ACQ64(p) \
716  __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)
717 #else
718 #define KMP_TEST_THEN_INC64(p) \
719  __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL)
720 #define KMP_TEST_THEN_INC_ACQ64(p) \
721  __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL)
722 #endif
723 #define KMP_TEST_THEN_ADD4_32(p) \
724  __sync_fetch_and_add((volatile kmp_int32 *)(p), 4)
725 #define KMP_TEST_THEN_ADD4_ACQ32(p) \
726  __sync_fetch_and_add((volatile kmp_int32 *)(p), 4)
727 #if KMP_ARCH_MIPS
728 #define KMP_TEST_THEN_ADD4_64(p) \
729  __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST)
730 #define KMP_TEST_THEN_ADD4_ACQ64(p) \
731  __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST)
732 #define KMP_TEST_THEN_DEC64(p) \
733  __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)
734 #define KMP_TEST_THEN_DEC_ACQ64(p) \
735  __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)
736 #else
737 #define KMP_TEST_THEN_ADD4_64(p) \
738  __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL)
739 #define KMP_TEST_THEN_ADD4_ACQ64(p) \
740  __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL)
741 #define KMP_TEST_THEN_DEC64(p) \
742  __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL)
743 #define KMP_TEST_THEN_DEC_ACQ64(p) \
744  __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL)
745 #endif
746 #define KMP_TEST_THEN_DEC32(p) \
747  __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1)
748 #define KMP_TEST_THEN_DEC_ACQ32(p) \
749  __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1)
750 #define KMP_TEST_THEN_ADD8(p, v) \
751  __sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v))
752 #define KMP_TEST_THEN_ADD32(p, v) \
753  __sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v))
754 #if KMP_ARCH_MIPS
755 #define KMP_TEST_THEN_ADD64(p, v) \
756  __atomic_fetch_add((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \
757  __ATOMIC_SEQ_CST)
758 #else
759 #define KMP_TEST_THEN_ADD64(p, v) \
760  __sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v))
761 #endif
762 
763 #define KMP_TEST_THEN_OR8(p, v) \
764  __sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v))
765 #define KMP_TEST_THEN_AND8(p, v) \
766  __sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v))
767 #define KMP_TEST_THEN_OR32(p, v) \
768  __sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v))
769 #define KMP_TEST_THEN_AND32(p, v) \
770  __sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v))
771 #if KMP_ARCH_MIPS
772 #define KMP_TEST_THEN_OR64(p, v) \
773  __atomic_fetch_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \
774  __ATOMIC_SEQ_CST)
775 #define KMP_TEST_THEN_AND64(p, v) \
776  __atomic_fetch_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \
777  __ATOMIC_SEQ_CST)
778 #else
779 #define KMP_TEST_THEN_OR64(p, v) \
780  __sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
781 #define KMP_TEST_THEN_AND64(p, v) \
782  __sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
783 #endif
784 
785 #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \
786  __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \
787  (kmp_uint8)(sv))
788 #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \
789  __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \
790  (kmp_uint8)(sv))
791 #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \
792  __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \
793  (kmp_uint16)(sv))
794 #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \
795  __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \
796  (kmp_uint16)(sv))
797 #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \
798  __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \
799  (kmp_uint32)(sv))
800 #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \
801  __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \
802  (kmp_uint32)(sv))
803 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
804  __sync_bool_compare_and_swap((void *volatile *)(p), (void *)(cv), \
805  (void *)(sv))
806 
807 #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \
808  __sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \
809  (kmp_uint8)(sv))
810 #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \
811  __sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \
812  (kmp_uint16)(sv))
813 #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \
814  __sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \
815  (kmp_uint32)(sv))
816 #if KMP_ARCH_MIPS
817 static inline bool mips_sync_bool_compare_and_swap(volatile kmp_uint64 *p,
818  kmp_uint64 cv,
819  kmp_uint64 sv) {
820  return __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST,
821  __ATOMIC_SEQ_CST);
822 }
823 static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,
824  kmp_uint64 cv,
825  kmp_uint64 sv) {
826  __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST,
827  __ATOMIC_SEQ_CST);
828  return cv;
829 }
830 #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \
831  mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \
832  (kmp_uint64)(cv), (kmp_uint64)(sv))
833 #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \
834  mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \
835  (kmp_uint64)(cv), (kmp_uint64)(sv))
836 #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \
837  mips_sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \
838  (kmp_uint64)(sv))
839 #else
840 #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \
841  __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \
842  (kmp_uint64)(sv))
843 #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \
844  __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \
845  (kmp_uint64)(sv))
846 #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \
847  __sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \
848  (kmp_uint64)(sv))
849 #endif
850 
851 #if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800
852 #define KMP_XCHG_FIXED8(p, v) \
853  __atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \
854  __ATOMIC_SEQ_CST)
855 #else
856 #define KMP_XCHG_FIXED8(p, v) \
857  __sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v))
858 #endif
859 #define KMP_XCHG_FIXED16(p, v) \
860  __sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v))
861 #define KMP_XCHG_FIXED32(p, v) \
862  __sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v))
863 #define KMP_XCHG_FIXED64(p, v) \
864  __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
865 
866 inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) {
867  kmp_int32 tmp =
868  __sync_lock_test_and_set((volatile kmp_uint32 *)(p), *(kmp_uint32 *)&v);
869  return *(kmp_real32 *)&tmp;
870 }
871 
872 inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) {
873  kmp_int64 tmp =
874  __sync_lock_test_and_set((volatile kmp_uint64 *)(p), *(kmp_uint64 *)&v);
875  return *(kmp_real64 *)&tmp;
876 }
877 
878 #else
879 
880 extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v);
881 extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v);
882 extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v);
883 extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v);
884 extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v);
885 extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v);
886 extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v);
887 extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v);
888 extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v);
889 
890 extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv,
891  kmp_int8 sv);
892 extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv,
893  kmp_int16 sv);
894 extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv,
895  kmp_int32 sv);
896 extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv,
897  kmp_int64 sv);
898 extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv,
899  kmp_int8 sv);
900 extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p,
901  kmp_int16 cv, kmp_int16 sv);
902 extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p,
903  kmp_int32 cv, kmp_int32 sv);
904 extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p,
905  kmp_int64 cv, kmp_int64 sv);
906 
907 extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v);
908 extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v);
909 extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v);
910 extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v);
911 extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v);
912 extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);
913 
914 #define KMP_TEST_THEN_INC32(p) \
915  __kmp_test_then_add32((volatile kmp_int32 *)(p), 1)
916 #define KMP_TEST_THEN_INC_ACQ32(p) \
917  __kmp_test_then_add32((volatile kmp_int32 *)(p), 1)
918 #define KMP_TEST_THEN_INC64(p) \
919  __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL)
920 #define KMP_TEST_THEN_INC_ACQ64(p) \
921  __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL)
922 #define KMP_TEST_THEN_ADD4_32(p) \
923  __kmp_test_then_add32((volatile kmp_int32 *)(p), 4)
924 #define KMP_TEST_THEN_ADD4_ACQ32(p) \
925  __kmp_test_then_add32((volatile kmp_int32 *)(p), 4)
926 #define KMP_TEST_THEN_ADD4_64(p) \
927  __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL)
928 #define KMP_TEST_THEN_ADD4_ACQ64(p) \
929  __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL)
930 #define KMP_TEST_THEN_DEC32(p) \
931  __kmp_test_then_add32((volatile kmp_int32 *)(p), -1)
932 #define KMP_TEST_THEN_DEC_ACQ32(p) \
933  __kmp_test_then_add32((volatile kmp_int32 *)(p), -1)
934 #define KMP_TEST_THEN_DEC64(p) \
935  __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL)
936 #define KMP_TEST_THEN_DEC_ACQ64(p) \
937  __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL)
938 #define KMP_TEST_THEN_ADD8(p, v) \
939  __kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v))
940 #define KMP_TEST_THEN_ADD32(p, v) \
941  __kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v))
942 #define KMP_TEST_THEN_ADD64(p, v) \
943  __kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v))
944 
945 #define KMP_TEST_THEN_OR8(p, v) \
946  __kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v))
947 #define KMP_TEST_THEN_AND8(p, v) \
948  __kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v))
949 #define KMP_TEST_THEN_OR32(p, v) \
950  __kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v))
951 #define KMP_TEST_THEN_AND32(p, v) \
952  __kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v))
953 #define KMP_TEST_THEN_OR64(p, v) \
954  __kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
955 #define KMP_TEST_THEN_AND64(p, v) \
956  __kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v))
957 
958 #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \
959  __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \
960  (kmp_int8)(sv))
961 #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \
962  __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \
963  (kmp_int8)(sv))
964 #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \
965  __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \
966  (kmp_int16)(sv))
967 #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \
968  __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \
969  (kmp_int16)(sv))
970 #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \
971  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
972  (kmp_int32)(sv))
973 #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \
974  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
975  (kmp_int32)(sv))
976 #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \
977  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
978  (kmp_int64)(sv))
979 #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \
980  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
981  (kmp_int64)(sv))
982 
983 #if KMP_ARCH_X86
984 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
985  __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
986  (kmp_int32)(sv))
987 #else /* 64 bit pointers */
988 #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \
989  __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
990  (kmp_int64)(sv))
991 #endif /* KMP_ARCH_X86 */
992 
993 #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \
994  __kmp_compare_and_store_ret8((p), (cv), (sv))
995 #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \
996  __kmp_compare_and_store_ret16((p), (cv), (sv))
997 #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \
998  __kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \
999  (kmp_int32)(sv))
1000 #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \
1001  __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \
1002  (kmp_int64)(sv))
1003 
1004 #define KMP_XCHG_FIXED8(p, v) \
1005  __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v));
1006 #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v));
1007 #define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v));
1008 #define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v));
1009 #define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v));
1010 #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v));
1011 
1012 #endif /* KMP_ASM_INTRINS */
1013 
1014 /* ------------- relaxed consistency memory model stuff ------------------ */
1015 
1016 #if KMP_OS_WINDOWS
1017 #ifdef __ABSOFT_WIN
1018 #define KMP_MB() asm("nop")
1019 #define KMP_IMB() asm("nop")
1020 #else
1021 #define KMP_MB() /* _asm{ nop } */
1022 #define KMP_IMB() /* _asm{ nop } */
1023 #endif
1024 #endif /* KMP_OS_WINDOWS */
1025 
1026 #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \
1027  KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
1028 #if KMP_OS_WINDOWS
1029 #undef KMP_MB
1030 #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)
1031 #else /* !KMP_OS_WINDOWS */
1032 #define KMP_MB() __sync_synchronize()
1033 #endif
1034 #endif
1035 
1036 #ifndef KMP_MB
1037 #define KMP_MB() /* nothing to do */
1038 #endif
1039 
1040 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1041 #if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1042 #define KMP_MFENCE_() _mm_mfence()
1043 #define KMP_SFENCE_() _mm_sfence()
1044 #elif KMP_COMPILER_MSVC
1045 #define KMP_MFENCE_() MemoryBarrier()
1046 #define KMP_SFENCE_() MemoryBarrier()
1047 #else
1048 #define KMP_MFENCE_() __sync_synchronize()
1049 #define KMP_SFENCE_() __sync_synchronize()
1050 #endif
1051 #define KMP_MFENCE() \
1052  if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \
1053  __kmp_query_cpuid(&__kmp_cpuinfo); \
1054  } \
1055  if (__kmp_cpuinfo.flags.sse2) { \
1056  KMP_MFENCE_(); \
1057  }
1058 #define KMP_SFENCE() KMP_SFENCE_()
1059 #else
1060 #define KMP_MFENCE() KMP_MB()
1061 #define KMP_SFENCE() KMP_MB()
1062 #endif
1063 
1064 #ifndef KMP_IMB
1065 #define KMP_IMB() /* nothing to do */
1066 #endif
1067 
1068 #ifndef KMP_ST_REL32
1069 #define KMP_ST_REL32(A, D) (*(A) = (D))
1070 #endif
1071 
1072 #ifndef KMP_ST_REL64
1073 #define KMP_ST_REL64(A, D) (*(A) = (D))
1074 #endif
1075 
1076 #ifndef KMP_LD_ACQ32
1077 #define KMP_LD_ACQ32(A) (*(A))
1078 #endif
1079 
1080 #ifndef KMP_LD_ACQ64
1081 #define KMP_LD_ACQ64(A) (*(A))
1082 #endif
1083 
1084 /* ------------------------------------------------------------------------ */
1085 // FIXME - maybe this should this be
1086 //
1087 // #define TCR_4(a) (*(volatile kmp_int32 *)(&a))
1088 // #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b))
1089 //
1090 // #define TCR_8(a) (*(volatile kmp_int64 *)(a))
1091 // #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b))
1092 //
1093 // I'm fairly certain this is the correct thing to do, but I'm afraid
1094 // of performance regressions.
1095 
1096 #define TCR_1(a) (a)
1097 #define TCW_1(a, b) (a) = (b)
1098 #define TCR_4(a) (a)
1099 #define TCW_4(a, b) (a) = (b)
1100 #define TCI_4(a) (++(a))
1101 #define TCD_4(a) (--(a))
1102 #define TCR_8(a) (a)
1103 #define TCW_8(a, b) (a) = (b)
1104 #define TCI_8(a) (++(a))
1105 #define TCD_8(a) (--(a))
1106 #define TCR_SYNC_4(a) (a)
1107 #define TCW_SYNC_4(a, b) (a) = (b)
1108 #define TCX_SYNC_4(a, b, c) \
1109  KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \
1110  (kmp_int32)(b), (kmp_int32)(c))
1111 #define TCR_SYNC_8(a) (a)
1112 #define TCW_SYNC_8(a, b) (a) = (b)
1113 #define TCX_SYNC_8(a, b, c) \
1114  KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \
1115  (kmp_int64)(b), (kmp_int64)(c))
1116 
1117 #if KMP_ARCH_X86 || KMP_ARCH_MIPS
1118 // What about ARM?
1119 #define TCR_PTR(a) ((void *)TCR_4(a))
1120 #define TCW_PTR(a, b) TCW_4((a), (b))
1121 #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a))
1122 #define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b))
1123 #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c)))
1124 
1125 #else /* 64 bit pointers */
1126 
1127 #define TCR_PTR(a) ((void *)TCR_8(a))
1128 #define TCW_PTR(a, b) TCW_8((a), (b))
1129 #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a))
1130 #define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b))
1131 #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c)))
1132 
1133 #endif /* KMP_ARCH_X86 */
1134 
1135 /* If these FTN_{TRUE,FALSE} values change, may need to change several places
1136  where they are used to check that language is Fortran, not C. */
1137 
1138 #ifndef FTN_TRUE
1139 #define FTN_TRUE TRUE
1140 #endif
1141 
1142 #ifndef FTN_FALSE
1143 #define FTN_FALSE FALSE
1144 #endif
1145 
1146 typedef void (*microtask_t)(int *gtid, int *npr, ...);
1147 
1148 #ifdef USE_VOLATILE_CAST
1149 #define VOLATILE_CAST(x) (volatile x)
1150 #else
1151 #define VOLATILE_CAST(x) (x)
1152 #endif
1153 
1154 #define KMP_WAIT __kmp_wait_4
1155 #define KMP_WAIT_PTR __kmp_wait_4_ptr
1156 #define KMP_EQ __kmp_eq_4
1157 #define KMP_NEQ __kmp_neq_4
1158 #define KMP_LT __kmp_lt_4
1159 #define KMP_GE __kmp_ge_4
1160 #define KMP_LE __kmp_le_4
1161 
1162 /* Workaround for Intel(R) 64 code gen bug when taking address of static array
1163  * (Intel(R) 64 Tracker #138) */
1164 #if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX
1165 #define STATIC_EFI2_WORKAROUND
1166 #else
1167 #define STATIC_EFI2_WORKAROUND static
1168 #endif
1169 
1170 // Support of BGET usage
1171 #ifndef KMP_USE_BGET
1172 #define KMP_USE_BGET 1
1173 #endif
1174 
1175 // Switches for OSS builds
1176 #ifndef USE_CMPXCHG_FIX
1177 #define USE_CMPXCHG_FIX 1
1178 #endif
1179 
1180 // Enable dynamic user lock
1181 #define KMP_USE_DYNAMIC_LOCK 1
1182 
1183 // Enable Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) if
1184 // dynamic user lock is turned on
1185 #if KMP_USE_DYNAMIC_LOCK
1186 // Visual studio can't handle the asm sections in this code
1187 #define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC
1188 #ifdef KMP_USE_ADAPTIVE_LOCKS
1189 #undef KMP_USE_ADAPTIVE_LOCKS
1190 #endif
1191 #define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX
1192 #endif
1193 
1194 // Enable tick time conversion of ticks to seconds
1195 #if KMP_STATS_ENABLED
1196 #define KMP_HAVE_TICK_TIME \
1197  (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64))
1198 #endif
1199 
1200 // Warning levels
1201 enum kmp_warnings_level {
1202  kmp_warnings_off = 0, /* No warnings */
1203  kmp_warnings_low, /* Minimal warnings (default) */
1204  kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */
1205  kmp_warnings_verbose /* reserved */
1206 };
1207 
1208 #ifdef __cplusplus
1209 } // extern "C"
1210 #endif // __cplusplus
1211 
1212 // Safe C API
1213 #include "kmp_safe_c_api.h"
1214 
1215 // Macros for C++11 atomic functions
1216 #define KMP_ATOMIC_LD(p, order) (p)->load(std::memory_order_##order)
1217 #define KMP_ATOMIC_OP(op, p, v, order) (p)->op(v, std::memory_order_##order)
1218 
1219 // For non-default load/store
1220 #define KMP_ATOMIC_LD_ACQ(p) KMP_ATOMIC_LD(p, acquire)
1221 #define KMP_ATOMIC_LD_RLX(p) KMP_ATOMIC_LD(p, relaxed)
1222 #define KMP_ATOMIC_ST_REL(p, v) KMP_ATOMIC_OP(store, p, v, release)
1223 #define KMP_ATOMIC_ST_RLX(p, v) KMP_ATOMIC_OP(store, p, v, relaxed)
1224 
1225 // For non-default fetch_<op>
1226 #define KMP_ATOMIC_ADD(p, v) KMP_ATOMIC_OP(fetch_add, p, v, acq_rel)
1227 #define KMP_ATOMIC_SUB(p, v) KMP_ATOMIC_OP(fetch_sub, p, v, acq_rel)
1228 #define KMP_ATOMIC_AND(p, v) KMP_ATOMIC_OP(fetch_and, p, v, acq_rel)
1229 #define KMP_ATOMIC_OR(p, v) KMP_ATOMIC_OP(fetch_or, p, v, acq_rel)
1230 #define KMP_ATOMIC_INC(p) KMP_ATOMIC_OP(fetch_add, p, 1, acq_rel)
1231 #define KMP_ATOMIC_DEC(p) KMP_ATOMIC_OP(fetch_sub, p, 1, acq_rel)
1232 #define KMP_ATOMIC_ADD_RLX(p, v) KMP_ATOMIC_OP(fetch_add, p, v, relaxed)
1233 #define KMP_ATOMIC_INC_RLX(p) KMP_ATOMIC_OP(fetch_add, p, 1, relaxed)
1234 
1235 // Callers of the following functions cannot see the side effect on "expected".
1236 template <typename T>
1237 bool __kmp_atomic_compare_store(std::atomic<T> *p, T expected, T desired) {
1238  return p->compare_exchange_strong(
1239  expected, desired, std::memory_order_acq_rel, std::memory_order_relaxed);
1240 }
1241 
1242 template <typename T>
1243 bool __kmp_atomic_compare_store_acq(std::atomic<T> *p, T expected, T desired) {
1244  return p->compare_exchange_strong(
1245  expected, desired, std::memory_order_acquire, std::memory_order_relaxed);
1246 }
1247 
1248 template <typename T>
1249 bool __kmp_atomic_compare_store_rel(std::atomic<T> *p, T expected, T desired) {
1250  return p->compare_exchange_strong(
1251  expected, desired, std::memory_order_release, std::memory_order_relaxed);
1252 }
1253 
1254 // Symbol lookup on Linux/Windows
1255 #if KMP_OS_WINDOWS
1256 extern void *__kmp_lookup_symbol(const char *name);
1257 #define KMP_DLSYM(name) __kmp_lookup_symbol(name)
1258 #define KMP_DLSYM_NEXT(name) nullptr
1259 #else
1260 #define KMP_DLSYM(name) dlsym(RTLD_DEFAULT, name)
1261 #define KMP_DLSYM_NEXT(name) dlsym(RTLD_NEXT, name)
1262 #endif
1263 
1264 #endif /* KMP_OS_H */