LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp_atomic.h"
17 #include "kmp.h" // TRUE, asm routines prototypes
18 
19 typedef unsigned char uchar;
20 typedef unsigned short ushort;
21 
564 /*
565  * Global vars
566  */
567 
568 #ifndef KMP_GOMP_COMPAT
569 int __kmp_atomic_mode = 1; // Intel perf
570 #else
571 int __kmp_atomic_mode = 2; // GOMP compatibility
572 #endif /* KMP_GOMP_COMPAT */
573 
574 KMP_ALIGN(128)
575 
576 // Control access to all user coded atomics in Gnu compat mode
577 kmp_atomic_lock_t __kmp_atomic_lock;
578 // Control access to all user coded atomics for 1-byte fixed data types
579 kmp_atomic_lock_t __kmp_atomic_lock_1i;
580 // Control access to all user coded atomics for 2-byte fixed data types
581 kmp_atomic_lock_t __kmp_atomic_lock_2i;
582 // Control access to all user coded atomics for 4-byte fixed data types
583 kmp_atomic_lock_t __kmp_atomic_lock_4i;
584 // Control access to all user coded atomics for kmp_real32 data type
585 kmp_atomic_lock_t __kmp_atomic_lock_4r;
586 // Control access to all user coded atomics for 8-byte fixed data types
587 kmp_atomic_lock_t __kmp_atomic_lock_8i;
588 // Control access to all user coded atomics for kmp_real64 data type
589 kmp_atomic_lock_t __kmp_atomic_lock_8r;
590 // Control access to all user coded atomics for complex byte data type
591 kmp_atomic_lock_t __kmp_atomic_lock_8c;
592 // Control access to all user coded atomics for long double data type
593 kmp_atomic_lock_t __kmp_atomic_lock_10r;
594 // Control access to all user coded atomics for _Quad data type
595 kmp_atomic_lock_t __kmp_atomic_lock_16r;
596 // Control access to all user coded atomics for double complex data type
597 kmp_atomic_lock_t __kmp_atomic_lock_16c;
598 // Control access to all user coded atomics for long double complex type
599 kmp_atomic_lock_t __kmp_atomic_lock_20c;
600 // Control access to all user coded atomics for _Quad complex data type
601 kmp_atomic_lock_t __kmp_atomic_lock_32c;
602 
603 /* 2007-03-02:
604  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
605  on *_32 and *_32e. This is just a temporary workaround for the problem. It
606  seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
607  in assembler language. */
608 #define KMP_ATOMIC_VOLATILE volatile
609 
610 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
611 
612 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613  lhs.q += rhs.q;
614 };
615 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616  lhs.q -= rhs.q;
617 };
618 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619  lhs.q *= rhs.q;
620 };
621 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622  lhs.q /= rhs.q;
623 };
624 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625  return lhs.q < rhs.q;
626 }
627 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
628  return lhs.q > rhs.q;
629 }
630 
631 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632  lhs.q += rhs.q;
633 };
634 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635  lhs.q -= rhs.q;
636 };
637 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638  lhs.q *= rhs.q;
639 };
640 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641  lhs.q /= rhs.q;
642 };
643 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644  return lhs.q < rhs.q;
645 }
646 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
647  return lhs.q > rhs.q;
648 }
649 
650 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651  lhs.q += rhs.q;
652 };
653 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654  lhs.q -= rhs.q;
655 };
656 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657  lhs.q *= rhs.q;
658 };
659 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
660  lhs.q /= rhs.q;
661 };
662 
663 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
664  kmp_cmplx128_a16_t &rhs) {
665  lhs.q += rhs.q;
666 };
667 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
668  kmp_cmplx128_a16_t &rhs) {
669  lhs.q -= rhs.q;
670 };
671 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
672  kmp_cmplx128_a16_t &rhs) {
673  lhs.q *= rhs.q;
674 };
675 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
676  kmp_cmplx128_a16_t &rhs) {
677  lhs.q /= rhs.q;
678 };
679 
680 #endif
681 
682 // ATOMIC implementation routines -----------------------------------------
683 // One routine for each operation and operand type.
684 // All routines declarations looks like
685 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
686 
687 #define KMP_CHECK_GTID \
688  if (gtid == KMP_GTID_UNKNOWN) { \
689  gtid = __kmp_entry_gtid(); \
690  } // check and get gtid when needed
691 
692 // Beginning of a definition (provides name, parameters, gebug trace)
693 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
694 // fixed)
695 // OP_ID - operation identifier (add, sub, mul, ...)
696 // TYPE - operands' type
697 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
698  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
699  TYPE *lhs, TYPE rhs) { \
700  KMP_DEBUG_ASSERT(__kmp_init_serial); \
701  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
702 
703 // ------------------------------------------------------------------------
704 // Lock variables used for critical sections for various size operands
705 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
706 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
707 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
708 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
709 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
710 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
711 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
712 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
713 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
714 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
715 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
716 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
717 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
718 
719 // ------------------------------------------------------------------------
720 // Operation on *lhs, rhs bound by critical section
721 // OP - operator (it's supposed to contain an assignment)
722 // LCK_ID - lock identifier
723 // Note: don't check gtid as it should always be valid
724 // 1, 2-byte - expect valid parameter, other - check before this macro
725 #define OP_CRITICAL(OP, LCK_ID) \
726  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
727  \
728  (*lhs) OP(rhs); \
729  \
730  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
731 
732 // ------------------------------------------------------------------------
733 // For GNU compatibility, we may need to use a critical section,
734 // even though it is not required by the ISA.
735 //
736 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
737 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
738 // critical section. On Intel(R) 64, all atomic operations are done with fetch
739 // and add or compare and exchange. Therefore, the FLAG parameter to this
740 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
741 // require a critical section, where we predict that they will be implemented
742 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
743 //
744 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
745 // the FLAG parameter should always be 1. If we know that we will be using
746 // a critical section, then we want to make certain that we use the generic
747 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
748 // locks that are specialized based upon the size or type of the data.
749 //
750 // If FLAG is 0, then we are relying on dead code elimination by the build
751 // compiler to get rid of the useless block of code, and save a needless
752 // branch at runtime.
753 
754 #ifdef KMP_GOMP_COMPAT
755 #define OP_GOMP_CRITICAL(OP, FLAG) \
756  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
757  KMP_CHECK_GTID; \
758  OP_CRITICAL(OP, 0); \
759  return; \
760  }
761 #else
762 #define OP_GOMP_CRITICAL(OP, FLAG)
763 #endif /* KMP_GOMP_COMPAT */
764 
765 #if KMP_MIC
766 #define KMP_DO_PAUSE _mm_delay_32(1)
767 #else
768 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
769 #endif /* KMP_MIC */
770 
771 // ------------------------------------------------------------------------
772 // Operation on *lhs, rhs using "compare_and_store" routine
773 // TYPE - operands' type
774 // BITS - size in bits, used to distinguish low level calls
775 // OP - operator
776 #define OP_CMPXCHG(TYPE, BITS, OP) \
777  { \
778  TYPE old_value, new_value; \
779  old_value = *(TYPE volatile *)lhs; \
780  new_value = old_value OP rhs; \
781  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
782  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
783  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
784  KMP_DO_PAUSE; \
785  \
786  old_value = *(TYPE volatile *)lhs; \
787  new_value = old_value OP rhs; \
788  } \
789  }
790 
791 #if USE_CMPXCHG_FIX
792 // 2007-06-25:
793 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
794 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
795 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
796 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
797 // the workaround.
798 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
799  { \
800  struct _sss { \
801  TYPE cmp; \
802  kmp_int##BITS *vvv; \
803  }; \
804  struct _sss old_value, new_value; \
805  old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
806  new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
807  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
808  new_value.cmp = old_value.cmp OP rhs; \
809  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
810  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
811  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
812  KMP_DO_PAUSE; \
813  \
814  *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
815  new_value.cmp = old_value.cmp OP rhs; \
816  } \
817  }
818 // end of the first part of the workaround for C78287
819 #endif // USE_CMPXCHG_FIX
820 
821 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
822 
823 // ------------------------------------------------------------------------
824 // X86 or X86_64: no alignment problems ====================================
825 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
826  GOMP_FLAG) \
827  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
828  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
829  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
830  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
831  }
832 // -------------------------------------------------------------------------
833 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
834  GOMP_FLAG) \
835  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
836  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
837  OP_CMPXCHG(TYPE, BITS, OP) \
838  }
839 #if USE_CMPXCHG_FIX
840 // -------------------------------------------------------------------------
841 // workaround for C78287 (complex(kind=4) data type)
842 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
843  MASK, GOMP_FLAG) \
844  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
845  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
846  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
847  }
848 // end of the second part of the workaround for C78287
849 #endif
850 
851 #else
852 // -------------------------------------------------------------------------
853 // Code for other architectures that don't handle unaligned accesses.
854 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
855  GOMP_FLAG) \
856  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
857  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
858  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
859  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
860  KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
861  } else { \
862  KMP_CHECK_GTID; \
863  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
864  } \
865  }
866 // -------------------------------------------------------------------------
867 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
868  GOMP_FLAG) \
869  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
870  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
871  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
872  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
873  } else { \
874  KMP_CHECK_GTID; \
875  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
876  } \
877  }
878 #if USE_CMPXCHG_FIX
879 // -------------------------------------------------------------------------
880 // workaround for C78287 (complex(kind=4) data type)
881 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
882  MASK, GOMP_FLAG) \
883  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
884  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
885  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
886  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
887  } else { \
888  KMP_CHECK_GTID; \
889  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
890  } \
891  }
892 // end of the second part of the workaround for C78287
893 #endif // USE_CMPXCHG_FIX
894 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
895 
896 // Routines for ATOMIC 4-byte operands addition and subtraction
897 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
898  0) // __kmpc_atomic_fixed4_add
899 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
900  0) // __kmpc_atomic_fixed4_sub
901 
902 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
903  KMP_ARCH_X86) // __kmpc_atomic_float4_add
904 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
905  KMP_ARCH_X86) // __kmpc_atomic_float4_sub
906 
907 // Routines for ATOMIC 8-byte operands addition and subtraction
908 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
909  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
910 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
911  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
912 
913 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
914  KMP_ARCH_X86) // __kmpc_atomic_float8_add
915 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
916  KMP_ARCH_X86) // __kmpc_atomic_float8_sub
917 
918 // ------------------------------------------------------------------------
919 // Entries definition for integer operands
920 // TYPE_ID - operands type and size (fixed4, float4)
921 // OP_ID - operation identifier (add, sub, mul, ...)
922 // TYPE - operand type
923 // BITS - size in bits, used to distinguish low level calls
924 // OP - operator (used in critical section)
925 // LCK_ID - lock identifier, used to possibly distinguish lock variable
926 // MASK - used for alignment check
927 
928 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
929 // ------------------------------------------------------------------------
930 // Routines for ATOMIC integer operands, other operators
931 // ------------------------------------------------------------------------
932 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
933 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
934  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
935 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
936  0) // __kmpc_atomic_fixed1_andb
937 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
938  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
939 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
940  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
941 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
942  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
943 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
944  0) // __kmpc_atomic_fixed1_orb
945 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
946  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
947 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
948  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
949 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
950  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
951 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
952  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
953 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
954  0) // __kmpc_atomic_fixed1_xor
955 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
956  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
957 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
958  0) // __kmpc_atomic_fixed2_andb
959 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
960  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
961 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
962  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
963 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
964  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
965 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
966  0) // __kmpc_atomic_fixed2_orb
967 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
968  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
969 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
970  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
971 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
972  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
973 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
974  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
975 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
976  0) // __kmpc_atomic_fixed2_xor
977 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
978  0) // __kmpc_atomic_fixed4_andb
979 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
980  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
981 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
982  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
983 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
984  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
985 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
986  0) // __kmpc_atomic_fixed4_orb
987 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
988  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
989 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
990  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
991 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
992  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
993 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
994  0) // __kmpc_atomic_fixed4_xor
995 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
996  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
997 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
998  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
999 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1000  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1001 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1002  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1003 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1004  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1005 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1006  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1007 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1008  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1009 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1010  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1011 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1012  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1013 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1014  KMP_ARCH_X86) // __kmpc_atomic_float4_div
1015 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1016  KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1017 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1018  KMP_ARCH_X86) // __kmpc_atomic_float8_div
1019 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1020  KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1021 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1022 
1023 /* ------------------------------------------------------------------------ */
1024 /* Routines for C/C++ Reduction operators && and || */
1025 
1026 // ------------------------------------------------------------------------
1027 // Need separate macros for &&, || because there is no combined assignment
1028 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1029 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1030  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1031  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1032  OP_CRITICAL(= *lhs OP, LCK_ID) \
1033  }
1034 
1035 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1036 
1037 // ------------------------------------------------------------------------
1038 // X86 or X86_64: no alignment problems ===================================
1039 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1040  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1041  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1042  OP_CMPXCHG(TYPE, BITS, OP) \
1043  }
1044 
1045 #else
1046 // ------------------------------------------------------------------------
1047 // Code for other architectures that don't handle unaligned accesses.
1048 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1049  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1050  OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1051  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1052  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1053  } else { \
1054  KMP_CHECK_GTID; \
1055  OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1056  } \
1057  }
1058 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1059 
1060 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1061  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1062 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1063  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1064 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1065  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1066 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1067  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1068 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1069  0) // __kmpc_atomic_fixed4_andl
1070 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1071  0) // __kmpc_atomic_fixed4_orl
1072 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1073  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1074 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1075  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1076 
1077 /* ------------------------------------------------------------------------- */
1078 /* Routines for Fortran operators that matched no one in C: */
1079 /* MAX, MIN, .EQV., .NEQV. */
1080 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1081 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1082 
1083 // -------------------------------------------------------------------------
1084 // MIN and MAX need separate macros
1085 // OP - operator to check if we need any actions?
1086 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1087  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1088  \
1089  if (*lhs OP rhs) { /* still need actions? */ \
1090  *lhs = rhs; \
1091  } \
1092  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1093 
1094 // -------------------------------------------------------------------------
1095 #ifdef KMP_GOMP_COMPAT
1096 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1097  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1098  KMP_CHECK_GTID; \
1099  MIN_MAX_CRITSECT(OP, 0); \
1100  return; \
1101  }
1102 #else
1103 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1104 #endif /* KMP_GOMP_COMPAT */
1105 
1106 // -------------------------------------------------------------------------
1107 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1108  { \
1109  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1110  TYPE old_value; \
1111  temp_val = *lhs; \
1112  old_value = temp_val; \
1113  while (old_value OP rhs && /* still need actions? */ \
1114  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1115  (kmp_int##BITS *)lhs, \
1116  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1117  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1118  KMP_CPU_PAUSE(); \
1119  temp_val = *lhs; \
1120  old_value = temp_val; \
1121  } \
1122  }
1123 
1124 // -------------------------------------------------------------------------
1125 // 1-byte, 2-byte operands - use critical section
1126 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1127  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1128  if (*lhs OP rhs) { /* need actions? */ \
1129  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1130  MIN_MAX_CRITSECT(OP, LCK_ID) \
1131  } \
1132  }
1133 
1134 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1135 
1136 // -------------------------------------------------------------------------
1137 // X86 or X86_64: no alignment problems ====================================
1138 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1139  GOMP_FLAG) \
1140  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1141  if (*lhs OP rhs) { \
1142  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1143  MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1144  } \
1145  }
1146 
1147 #else
1148 // -------------------------------------------------------------------------
1149 // Code for other architectures that don't handle unaligned accesses.
1150 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1151  GOMP_FLAG) \
1152  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1153  if (*lhs OP rhs) { \
1154  GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1155  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1156  MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1157  } else { \
1158  KMP_CHECK_GTID; \
1159  MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1160  } \
1161  } \
1162  }
1163 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1164 
1165 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1166  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1167 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1168  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1169 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1170  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1171 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1172  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1173 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1174  0) // __kmpc_atomic_fixed4_max
1175 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1176  0) // __kmpc_atomic_fixed4_min
1177 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1178  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1179 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1180  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1181 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1182  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1183 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1184  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1185 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1186  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1187 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1188  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1189 #if KMP_HAVE_QUAD
1190 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1191  1) // __kmpc_atomic_float16_max
1192 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1193  1) // __kmpc_atomic_float16_min
1194 #if (KMP_ARCH_X86)
1195 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1196  1) // __kmpc_atomic_float16_max_a16
1197 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1198  1) // __kmpc_atomic_float16_min_a16
1199 #endif
1200 #endif
1201 // ------------------------------------------------------------------------
1202 // Need separate macros for .EQV. because of the need of complement (~)
1203 // OP ignored for critical sections, ^=~ used instead
1204 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1205  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1206  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1207  OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1208  }
1209 
1210 // ------------------------------------------------------------------------
1211 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1212 // ------------------------------------------------------------------------
1213 // X86 or X86_64: no alignment problems ===================================
1214 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1215  GOMP_FLAG) \
1216  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1217  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1218  OP_CMPXCHG(TYPE, BITS, OP) \
1219  }
1220 // ------------------------------------------------------------------------
1221 #else
1222 // ------------------------------------------------------------------------
1223 // Code for other architectures that don't handle unaligned accesses.
1224 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1225  GOMP_FLAG) \
1226  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1227  OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1228  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1229  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1230  } else { \
1231  KMP_CHECK_GTID; \
1232  OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1233  } \
1234  }
1235 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1236 
1237 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1238  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1239 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1240  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1241 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1242  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1243 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1244  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1245 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1246  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1247 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1248  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1249 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1250  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1251 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1252  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1253 
1254 // ------------------------------------------------------------------------
1255 // Routines for Extended types: long double, _Quad, complex flavours (use
1256 // critical section)
1257 // TYPE_ID, OP_ID, TYPE - detailed above
1258 // OP - operator
1259 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1260 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1261  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1262  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1263  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1264  }
1265 
1266 /* ------------------------------------------------------------------------- */
1267 // routines for long double type
1268 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1269  1) // __kmpc_atomic_float10_add
1270 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1271  1) // __kmpc_atomic_float10_sub
1272 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1273  1) // __kmpc_atomic_float10_mul
1274 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1275  1) // __kmpc_atomic_float10_div
1276 #if KMP_HAVE_QUAD
1277 // routines for _Quad type
1278 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1279  1) // __kmpc_atomic_float16_add
1280 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1281  1) // __kmpc_atomic_float16_sub
1282 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1283  1) // __kmpc_atomic_float16_mul
1284 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1285  1) // __kmpc_atomic_float16_div
1286 #if (KMP_ARCH_X86)
1287 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1288  1) // __kmpc_atomic_float16_add_a16
1289 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1290  1) // __kmpc_atomic_float16_sub_a16
1291 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1292  1) // __kmpc_atomic_float16_mul_a16
1293 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1294  1) // __kmpc_atomic_float16_div_a16
1295 #endif
1296 #endif
1297 // routines for complex types
1298 
1299 #if USE_CMPXCHG_FIX
1300 // workaround for C78287 (complex(kind=4) data type)
1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1302  1) // __kmpc_atomic_cmplx4_add
1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1304  1) // __kmpc_atomic_cmplx4_sub
1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1306  1) // __kmpc_atomic_cmplx4_mul
1307 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1308  1) // __kmpc_atomic_cmplx4_div
1309 // end of the workaround for C78287
1310 #else
1311 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1312 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1313 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1314 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1315 #endif // USE_CMPXCHG_FIX
1316 
1317 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1318 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1319 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1320 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1321 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1322  1) // __kmpc_atomic_cmplx10_add
1323 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1324  1) // __kmpc_atomic_cmplx10_sub
1325 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1326  1) // __kmpc_atomic_cmplx10_mul
1327 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1328  1) // __kmpc_atomic_cmplx10_div
1329 #if KMP_HAVE_QUAD
1330 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1331  1) // __kmpc_atomic_cmplx16_add
1332 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1333  1) // __kmpc_atomic_cmplx16_sub
1334 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1335  1) // __kmpc_atomic_cmplx16_mul
1336 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1337  1) // __kmpc_atomic_cmplx16_div
1338 #if (KMP_ARCH_X86)
1339 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1340  1) // __kmpc_atomic_cmplx16_add_a16
1341 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1342  1) // __kmpc_atomic_cmplx16_sub_a16
1343 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1344  1) // __kmpc_atomic_cmplx16_mul_a16
1345 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1346  1) // __kmpc_atomic_cmplx16_div_a16
1347 #endif
1348 #endif
1349 
1350 #if OMP_40_ENABLED
1351 
1352 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1353 // Supported only on IA-32 architecture and Intel(R) 64
1354 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1355 
1356 // ------------------------------------------------------------------------
1357 // Operation on *lhs, rhs bound by critical section
1358 // OP - operator (it's supposed to contain an assignment)
1359 // LCK_ID - lock identifier
1360 // Note: don't check gtid as it should always be valid
1361 // 1, 2-byte - expect valid parameter, other - check before this macro
1362 #define OP_CRITICAL_REV(OP, LCK_ID) \
1363  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1364  \
1365  (*lhs) = (rhs)OP(*lhs); \
1366  \
1367  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1368 
1369 #ifdef KMP_GOMP_COMPAT
1370 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1371  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1372  KMP_CHECK_GTID; \
1373  OP_CRITICAL_REV(OP, 0); \
1374  return; \
1375  }
1376 #else
1377 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1378 #endif /* KMP_GOMP_COMPAT */
1379 
1380 // Beginning of a definition (provides name, parameters, gebug trace)
1381 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1382 // fixed)
1383 // OP_ID - operation identifier (add, sub, mul, ...)
1384 // TYPE - operands' type
1385 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1386  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1387  TYPE *lhs, TYPE rhs) { \
1388  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1389  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1390 
1391 // ------------------------------------------------------------------------
1392 // Operation on *lhs, rhs using "compare_and_store" routine
1393 // TYPE - operands' type
1394 // BITS - size in bits, used to distinguish low level calls
1395 // OP - operator
1396 // Note: temp_val introduced in order to force the compiler to read
1397 // *lhs only once (w/o it the compiler reads *lhs twice)
1398 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1399  { \
1400  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1401  TYPE old_value, new_value; \
1402  temp_val = *lhs; \
1403  old_value = temp_val; \
1404  new_value = rhs OP old_value; \
1405  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1406  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1407  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1408  KMP_DO_PAUSE; \
1409  \
1410  temp_val = *lhs; \
1411  old_value = temp_val; \
1412  new_value = rhs OP old_value; \
1413  } \
1414  }
1415 
1416 // -------------------------------------------------------------------------
1417 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1418  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1419  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1420  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1421  }
1422 
1423 // ------------------------------------------------------------------------
1424 // Entries definition for integer operands
1425 // TYPE_ID - operands type and size (fixed4, float4)
1426 // OP_ID - operation identifier (add, sub, mul, ...)
1427 // TYPE - operand type
1428 // BITS - size in bits, used to distinguish low level calls
1429 // OP - operator (used in critical section)
1430 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1431 
1432 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1433 // ------------------------------------------------------------------------
1434 // Routines for ATOMIC integer operands, other operators
1435 // ------------------------------------------------------------------------
1436 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1437 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1438  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1439 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1440  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1441 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1442  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1443 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1444  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1445 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1446  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1447 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1448  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1449 
1450 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1451  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1452 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1453  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1454 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1455  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1456 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1457  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1458 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1459  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1460 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1461  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1462 
1463 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1464  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1465 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1466  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1467 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1468  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1469 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1470  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1471 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1472  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1473 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1474  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1475 
1476 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1477  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1478 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1479  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1480 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1481  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1482 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1483  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1484 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1485  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1486 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1487  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1488 
1489 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1490  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1491 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1492  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1493 
1494 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1495  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1496 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1497  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1498 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1499 
1500 // ------------------------------------------------------------------------
1501 // Routines for Extended types: long double, _Quad, complex flavours (use
1502 // critical section)
1503 // TYPE_ID, OP_ID, TYPE - detailed above
1504 // OP - operator
1505 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1506 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1507  ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1508  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1509  OP_CRITICAL_REV(OP, LCK_ID) \
1510  }
1511 
1512 /* ------------------------------------------------------------------------- */
1513 // routines for long double type
1514 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1515  1) // __kmpc_atomic_float10_sub_rev
1516 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1517  1) // __kmpc_atomic_float10_div_rev
1518 #if KMP_HAVE_QUAD
1519 // routines for _Quad type
1520 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1521  1) // __kmpc_atomic_float16_sub_rev
1522 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1523  1) // __kmpc_atomic_float16_div_rev
1524 #if (KMP_ARCH_X86)
1525 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1526  1) // __kmpc_atomic_float16_sub_a16_rev
1527 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1528  1) // __kmpc_atomic_float16_div_a16_rev
1529 #endif
1530 #endif
1531 
1532 // routines for complex types
1533 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1534  1) // __kmpc_atomic_cmplx4_sub_rev
1535 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1536  1) // __kmpc_atomic_cmplx4_div_rev
1537 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1538  1) // __kmpc_atomic_cmplx8_sub_rev
1539 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1540  1) // __kmpc_atomic_cmplx8_div_rev
1541 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1542  1) // __kmpc_atomic_cmplx10_sub_rev
1543 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1544  1) // __kmpc_atomic_cmplx10_div_rev
1545 #if KMP_HAVE_QUAD
1546 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1547  1) // __kmpc_atomic_cmplx16_sub_rev
1548 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1549  1) // __kmpc_atomic_cmplx16_div_rev
1550 #if (KMP_ARCH_X86)
1551 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1552  1) // __kmpc_atomic_cmplx16_sub_a16_rev
1553 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1554  1) // __kmpc_atomic_cmplx16_div_a16_rev
1555 #endif
1556 #endif
1557 
1558 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1559 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1560 
1561 #endif // OMP_40_ENABLED
1562 
1563 /* ------------------------------------------------------------------------ */
1564 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1565 /* Note: in order to reduce the total number of types combinations */
1566 /* it is supposed that compiler converts RHS to longest floating type,*/
1567 /* that is _Quad, before call to any of these routines */
1568 /* Conversion to _Quad will be done by the compiler during calculation, */
1569 /* conversion back to TYPE - before the assignment, like: */
1570 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1571 /* Performance penalty expected because of SW emulation use */
1572 /* ------------------------------------------------------------------------ */
1573 
1574 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1575  void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1576  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1577  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1578  KA_TRACE(100, \
1579  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1580  gtid));
1581 
1582 // -------------------------------------------------------------------------
1583 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1584  GOMP_FLAG) \
1585  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1586  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1587  OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1588  }
1589 
1590 // -------------------------------------------------------------------------
1591 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1592 // -------------------------------------------------------------------------
1593 // X86 or X86_64: no alignment problems ====================================
1594 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1595  LCK_ID, MASK, GOMP_FLAG) \
1596  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1597  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1598  OP_CMPXCHG(TYPE, BITS, OP) \
1599  }
1600 // -------------------------------------------------------------------------
1601 #else
1602 // ------------------------------------------------------------------------
1603 // Code for other architectures that don't handle unaligned accesses.
1604 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1605  LCK_ID, MASK, GOMP_FLAG) \
1606  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1607  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1608  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1609  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1610  } else { \
1611  KMP_CHECK_GTID; \
1612  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1613  } \
1614  }
1615 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1616 
1617 // -------------------------------------------------------------------------
1618 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1619 // -------------------------------------------------------------------------
1620 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1621  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1622  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1623  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1624  OP_CMPXCHG_REV(TYPE, BITS, OP) \
1625  }
1626 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1627  LCK_ID, GOMP_FLAG) \
1628  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1629  OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1630  OP_CRITICAL_REV(OP, LCK_ID) \
1631  }
1632 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1633 
1634 // RHS=float8
1635 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1636  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1637 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1638  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1639 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1640  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1641 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1642  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1644  0) // __kmpc_atomic_fixed4_mul_float8
1645 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1646  0) // __kmpc_atomic_fixed4_div_float8
1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1648  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1649 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1650  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1652  KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1654  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1656  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1657 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1658  KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1659 
1660 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1661 // use them)
1662 #if KMP_HAVE_QUAD
1663 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1664  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1665 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1666  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1667 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1668  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1669 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1670  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1671 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1672  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1673 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1674  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1675 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1676  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1677 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1678  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1679 
1680 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1681  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1683  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1684 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1685  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1687  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1688 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1689  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1691  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1693  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1694 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1695  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1696 
1697 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1698  0) // __kmpc_atomic_fixed4_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1700  0) // __kmpc_atomic_fixed4u_add_fp
1701 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1702  0) // __kmpc_atomic_fixed4_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1704  0) // __kmpc_atomic_fixed4u_sub_fp
1705 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1706  0) // __kmpc_atomic_fixed4_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1708  0) // __kmpc_atomic_fixed4u_mul_fp
1709 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1710  0) // __kmpc_atomic_fixed4_div_fp
1711 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1712  0) // __kmpc_atomic_fixed4u_div_fp
1713 
1714 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1715  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1717  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1718 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1719  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1721  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1722 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1723  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1725  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1726 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1727  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1728 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1729  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1730 
1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1732  KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1734  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1736  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1737 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1738  KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1739 
1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1741  KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1743  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1745  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1746 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1747  KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1748 
1749 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1750  1) // __kmpc_atomic_float10_add_fp
1751 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1752  1) // __kmpc_atomic_float10_sub_fp
1753 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1754  1) // __kmpc_atomic_float10_mul_fp
1755 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1756  1) // __kmpc_atomic_float10_div_fp
1757 
1758 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1759 // Reverse operations
1760 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1761  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1763  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1765  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1766 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1767  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1768 
1769 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1770  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1772  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1774  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1775 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1776  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1777 
1778 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1779  0) // __kmpc_atomic_fixed4_sub_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1781  0) // __kmpc_atomic_fixed4u_sub_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1783  0) // __kmpc_atomic_fixed4_div_rev_fp
1784 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1785  0) // __kmpc_atomic_fixed4u_div_rev_fp
1786 
1787 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1788  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1790  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1792  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1793 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1794  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1795 
1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1797  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1798 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1799  KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1800 
1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1802  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1803 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1804  KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1805 
1806 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1807  1) // __kmpc_atomic_float10_sub_rev_fp
1808 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1809  1) // __kmpc_atomic_float10_div_rev_fp
1810 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1811 
1812 #endif
1813 
1814 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1815 // ------------------------------------------------------------------------
1816 // X86 or X86_64: no alignment problems ====================================
1817 #if USE_CMPXCHG_FIX
1818 // workaround for C78287 (complex(kind=4) data type)
1819 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1820  LCK_ID, MASK, GOMP_FLAG) \
1821  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1822  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1823  OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1824  }
1825 // end of the second part of the workaround for C78287
1826 #else
1827 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1828  LCK_ID, MASK, GOMP_FLAG) \
1829  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1830  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1831  OP_CMPXCHG(TYPE, BITS, OP) \
1832  }
1833 #endif // USE_CMPXCHG_FIX
1834 #else
1835 // ------------------------------------------------------------------------
1836 // Code for other architectures that don't handle unaligned accesses.
1837 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1838  LCK_ID, MASK, GOMP_FLAG) \
1839  ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1840  OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1841  if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1842  OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1843  } else { \
1844  KMP_CHECK_GTID; \
1845  OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1846  } \
1847  }
1848 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1849 
1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1851  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1853  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1855  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1856 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1857  7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1858 
1859 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1860 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1861 
1862 // ------------------------------------------------------------------------
1863 // Atomic READ routines
1864 
1865 // ------------------------------------------------------------------------
1866 // Beginning of a definition (provides name, parameters, gebug trace)
1867 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1868 // fixed)
1869 // OP_ID - operation identifier (add, sub, mul, ...)
1870 // TYPE - operands' type
1871 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1872  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1873  TYPE *loc) { \
1874  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1875  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1876 
1877 // ------------------------------------------------------------------------
1878 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1879 // TYPE - operands' type
1880 // BITS - size in bits, used to distinguish low level calls
1881 // OP - operator
1882 // Note: temp_val introduced in order to force the compiler to read
1883 // *lhs only once (w/o it the compiler reads *lhs twice)
1884 // TODO: check if it is still necessary
1885 // Return old value regardless of the result of "compare & swap# operation
1886 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1887  { \
1888  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1889  union f_i_union { \
1890  TYPE f_val; \
1891  kmp_int##BITS i_val; \
1892  }; \
1893  union f_i_union old_value; \
1894  temp_val = *loc; \
1895  old_value.f_val = temp_val; \
1896  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1897  (kmp_int##BITS *)loc, \
1898  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1899  *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1900  new_value = old_value.f_val; \
1901  return new_value; \
1902  }
1903 
1904 // -------------------------------------------------------------------------
1905 // Operation on *lhs, rhs bound by critical section
1906 // OP - operator (it's supposed to contain an assignment)
1907 // LCK_ID - lock identifier
1908 // Note: don't check gtid as it should always be valid
1909 // 1, 2-byte - expect valid parameter, other - check before this macro
1910 #define OP_CRITICAL_READ(OP, LCK_ID) \
1911  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1912  \
1913  new_value = (*loc); \
1914  \
1915  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1916 
1917 // -------------------------------------------------------------------------
1918 #ifdef KMP_GOMP_COMPAT
1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1920  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1921  KMP_CHECK_GTID; \
1922  OP_CRITICAL_READ(OP, 0); \
1923  return new_value; \
1924  }
1925 #else
1926 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1927 #endif /* KMP_GOMP_COMPAT */
1928 
1929 // -------------------------------------------------------------------------
1930 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1931  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1932  TYPE new_value; \
1933  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1934  new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1935  return new_value; \
1936  }
1937 // -------------------------------------------------------------------------
1938 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1939  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1940  TYPE new_value; \
1941  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1942  OP_CMPXCHG_READ(TYPE, BITS, OP) \
1943  }
1944 // ------------------------------------------------------------------------
1945 // Routines for Extended types: long double, _Quad, complex flavours (use
1946 // critical section)
1947 // TYPE_ID, OP_ID, TYPE - detailed above
1948 // OP - operator
1949 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1950 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1951  ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1952  TYPE new_value; \
1953  OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1954  OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1955  return new_value; \
1956  }
1957 
1958 // ------------------------------------------------------------------------
1959 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1960 // value doesn't work.
1961 // Let's return the read value through the additional parameter.
1962 #if (KMP_OS_WINDOWS)
1963 
1964 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1965  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1966  \
1967  (*out) = (*loc); \
1968  \
1969  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1970 // ------------------------------------------------------------------------
1971 #ifdef KMP_GOMP_COMPAT
1972 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1973  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1974  KMP_CHECK_GTID; \
1975  OP_CRITICAL_READ_WRK(OP, 0); \
1976  }
1977 #else
1978 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1979 #endif /* KMP_GOMP_COMPAT */
1980 // ------------------------------------------------------------------------
1981 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1982  void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1983  TYPE *loc) { \
1984  KMP_DEBUG_ASSERT(__kmp_init_serial); \
1985  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1986 
1987 // ------------------------------------------------------------------------
1988 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1989  ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1990  OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1991  OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1992  }
1993 
1994 #endif // KMP_OS_WINDOWS
1995 
1996 // ------------------------------------------------------------------------
1997 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1998 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1999 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2000  KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2001 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2002  KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2003 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2004  KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2005 
2006 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2007 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2008  KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2009 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2010  KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2011 
2012 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2013  1) // __kmpc_atomic_float10_rd
2014 #if KMP_HAVE_QUAD
2015 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2016  1) // __kmpc_atomic_float16_rd
2017 #endif // KMP_HAVE_QUAD
2018 
2019 // Fix for CQ220361 on Windows* OS
2020 #if (KMP_OS_WINDOWS)
2021 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2022  1) // __kmpc_atomic_cmplx4_rd
2023 #else
2024 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2025  1) // __kmpc_atomic_cmplx4_rd
2026 #endif
2027 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2028  1) // __kmpc_atomic_cmplx8_rd
2029 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2030  1) // __kmpc_atomic_cmplx10_rd
2031 #if KMP_HAVE_QUAD
2032 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2033  1) // __kmpc_atomic_cmplx16_rd
2034 #if (KMP_ARCH_X86)
2035 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2036  1) // __kmpc_atomic_float16_a16_rd
2037 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2038  1) // __kmpc_atomic_cmplx16_a16_rd
2039 #endif
2040 #endif
2041 
2042 // ------------------------------------------------------------------------
2043 // Atomic WRITE routines
2044 
2045 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2046  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2047  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2048  KMP_XCHG_FIXED##BITS(lhs, rhs); \
2049  }
2050 // ------------------------------------------------------------------------
2051 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2052  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2053  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2054  KMP_XCHG_REAL##BITS(lhs, rhs); \
2055  }
2056 
2057 // ------------------------------------------------------------------------
2058 // Operation on *lhs, rhs using "compare_and_store" routine
2059 // TYPE - operands' type
2060 // BITS - size in bits, used to distinguish low level calls
2061 // OP - operator
2062 // Note: temp_val introduced in order to force the compiler to read
2063 // *lhs only once (w/o it the compiler reads *lhs twice)
2064 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2065  { \
2066  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2067  TYPE old_value, new_value; \
2068  temp_val = *lhs; \
2069  old_value = temp_val; \
2070  new_value = rhs; \
2071  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2072  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2073  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2074  KMP_CPU_PAUSE(); \
2075  \
2076  temp_val = *lhs; \
2077  old_value = temp_val; \
2078  new_value = rhs; \
2079  } \
2080  }
2081 
2082 // -------------------------------------------------------------------------
2083 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2084  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2085  OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2086  OP_CMPXCHG_WR(TYPE, BITS, OP) \
2087  }
2088 
2089 // ------------------------------------------------------------------------
2090 // Routines for Extended types: long double, _Quad, complex flavours (use
2091 // critical section)
2092 // TYPE_ID, OP_ID, TYPE - detailed above
2093 // OP - operator
2094 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2095 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2096  ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2097  OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2098  OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2099  }
2100 // -------------------------------------------------------------------------
2101 
2102 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2103  KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2104 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2105  KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2106 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2107  KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2108 #if (KMP_ARCH_X86)
2109 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2110  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2111 #else
2112 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2113  KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2114 #endif
2115 
2116 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2117  KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2118 #if (KMP_ARCH_X86)
2119 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2120  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2121 #else
2122 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2123  KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2124 #endif
2125 
2126 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2127  1) // __kmpc_atomic_float10_wr
2128 #if KMP_HAVE_QUAD
2129 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2130  1) // __kmpc_atomic_float16_wr
2131 #endif
2132 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2133 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2134  1) // __kmpc_atomic_cmplx8_wr
2135 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2136  1) // __kmpc_atomic_cmplx10_wr
2137 #if KMP_HAVE_QUAD
2138 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2139  1) // __kmpc_atomic_cmplx16_wr
2140 #if (KMP_ARCH_X86)
2141 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2142  1) // __kmpc_atomic_float16_a16_wr
2143 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2144  1) // __kmpc_atomic_cmplx16_a16_wr
2145 #endif
2146 #endif
2147 
2148 // ------------------------------------------------------------------------
2149 // Atomic CAPTURE routines
2150 
2151 // Beginning of a definition (provides name, parameters, gebug trace)
2152 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2153 // fixed)
2154 // OP_ID - operation identifier (add, sub, mul, ...)
2155 // TYPE - operands' type
2156 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2157  RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2158  TYPE *lhs, TYPE rhs, int flag) { \
2159  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2160  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2161 
2162 // -------------------------------------------------------------------------
2163 // Operation on *lhs, rhs bound by critical section
2164 // OP - operator (it's supposed to contain an assignment)
2165 // LCK_ID - lock identifier
2166 // Note: don't check gtid as it should always be valid
2167 // 1, 2-byte - expect valid parameter, other - check before this macro
2168 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2169  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2170  \
2171  if (flag) { \
2172  (*lhs) OP rhs; \
2173  new_value = (*lhs); \
2174  } else { \
2175  new_value = (*lhs); \
2176  (*lhs) OP rhs; \
2177  } \
2178  \
2179  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2180  return new_value;
2181 
2182 // ------------------------------------------------------------------------
2183 #ifdef KMP_GOMP_COMPAT
2184 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2185  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2186  KMP_CHECK_GTID; \
2187  OP_CRITICAL_CPT(OP## =, 0); \
2188  }
2189 #else
2190 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2191 #endif /* KMP_GOMP_COMPAT */
2192 
2193 // ------------------------------------------------------------------------
2194 // Operation on *lhs, rhs using "compare_and_store" routine
2195 // TYPE - operands' type
2196 // BITS - size in bits, used to distinguish low level calls
2197 // OP - operator
2198 // Note: temp_val introduced in order to force the compiler to read
2199 // *lhs only once (w/o it the compiler reads *lhs twice)
2200 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2201  { \
2202  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2203  TYPE old_value, new_value; \
2204  temp_val = *lhs; \
2205  old_value = temp_val; \
2206  new_value = old_value OP rhs; \
2207  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2208  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2209  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2210  KMP_CPU_PAUSE(); \
2211  \
2212  temp_val = *lhs; \
2213  old_value = temp_val; \
2214  new_value = old_value OP rhs; \
2215  } \
2216  if (flag) { \
2217  return new_value; \
2218  } else \
2219  return old_value; \
2220  }
2221 
2222 // -------------------------------------------------------------------------
2223 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2224  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2225  TYPE new_value; \
2226  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2227  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2228  }
2229 
2230 // -------------------------------------------------------------------------
2231 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2232  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2233  TYPE old_value, new_value; \
2234  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2235  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2236  old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2237  if (flag) { \
2238  return old_value OP rhs; \
2239  } else \
2240  return old_value; \
2241  }
2242 // -------------------------------------------------------------------------
2243 
2244 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2245  0) // __kmpc_atomic_fixed4_add_cpt
2246 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2247  0) // __kmpc_atomic_fixed4_sub_cpt
2248 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2249  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2250 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2251  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2252 
2253 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2254  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2255 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2256  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2257 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2258  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2259 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2260  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2261 
2262 // ------------------------------------------------------------------------
2263 // Entries definition for integer operands
2264 // TYPE_ID - operands type and size (fixed4, float4)
2265 // OP_ID - operation identifier (add, sub, mul, ...)
2266 // TYPE - operand type
2267 // BITS - size in bits, used to distinguish low level calls
2268 // OP - operator (used in critical section)
2269 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2270 // ------------------------------------------------------------------------
2271 // Routines for ATOMIC integer operands, other operators
2272 // ------------------------------------------------------------------------
2273 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2274 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2275  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2276 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2277  0) // __kmpc_atomic_fixed1_andb_cpt
2278 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2279  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2280 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2281  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2282 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2283  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2284 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2285  0) // __kmpc_atomic_fixed1_orb_cpt
2286 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2287  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2288 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2289  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2290 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2291  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2292 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2293  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2294 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2295  0) // __kmpc_atomic_fixed1_xor_cpt
2296 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2297  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2298 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2299  0) // __kmpc_atomic_fixed2_andb_cpt
2300 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2301  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2302 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2303  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2304 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2305  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2307  0) // __kmpc_atomic_fixed2_orb_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2309  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2311  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2313  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2315  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2317  0) // __kmpc_atomic_fixed2_xor_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2319  0) // __kmpc_atomic_fixed4_andb_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2321  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2323  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2325  KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2327  0) // __kmpc_atomic_fixed4_orb_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2329  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2331  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2333  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2335  0) // __kmpc_atomic_fixed4_xor_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2337  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2339  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2341  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2343  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2345  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2347  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2349  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2351  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2353  KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2354 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2355  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2356 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2357  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2358 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2359  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2360 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2361  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2362 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2363 
2364 // CAPTURE routines for mixed types RHS=float16
2365 #if KMP_HAVE_QUAD
2366 
2367 // Beginning of a definition (provides name, parameters, gebug trace)
2368 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2369 // fixed)
2370 // OP_ID - operation identifier (add, sub, mul, ...)
2371 // TYPE - operands' type
2372 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2373  TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2374  ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2375  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2376  KA_TRACE(100, \
2377  ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2378  gtid));
2379 
2380 // -------------------------------------------------------------------------
2381 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2382  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2383  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2384  TYPE new_value; \
2385  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2386  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2387  }
2388 
2389 // -------------------------------------------------------------------------
2390 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2391  LCK_ID, GOMP_FLAG) \
2392  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2393  TYPE new_value; \
2394  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2395  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2396  }
2397 
2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2399  KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2401  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2403  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2405  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2407  KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2409  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2411  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2413  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2414 
2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2416  KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2418  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2420  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2422  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2424  KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2426  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2428  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2430  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2431 
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2433  0) // __kmpc_atomic_fixed4_add_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2435  0) // __kmpc_atomic_fixed4u_add_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2437  0) // __kmpc_atomic_fixed4_sub_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2439  0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2441  0) // __kmpc_atomic_fixed4_mul_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2443  0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2445  0) // __kmpc_atomic_fixed4_div_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2447  0) // __kmpc_atomic_fixed4u_div_cpt_fp
2448 
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2450  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2452  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2454  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2456  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2458  KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2460  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2462  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2464  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2465 
2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2467  KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2469  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2471  KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2473  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2474 
2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2476  KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2478  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2480  KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2482  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2483 
2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2485  1) // __kmpc_atomic_float10_add_cpt_fp
2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2487  1) // __kmpc_atomic_float10_sub_cpt_fp
2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2489  1) // __kmpc_atomic_float10_mul_cpt_fp
2490 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2491  1) // __kmpc_atomic_float10_div_cpt_fp
2492 
2493 #endif // KMP_HAVE_QUAD
2494 
2495 // ------------------------------------------------------------------------
2496 // Routines for C/C++ Reduction operators && and ||
2497 
2498 // -------------------------------------------------------------------------
2499 // Operation on *lhs, rhs bound by critical section
2500 // OP - operator (it's supposed to contain an assignment)
2501 // LCK_ID - lock identifier
2502 // Note: don't check gtid as it should always be valid
2503 // 1, 2-byte - expect valid parameter, other - check before this macro
2504 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2505  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2506  \
2507  if (flag) { \
2508  new_value OP rhs; \
2509  } else \
2510  new_value = (*lhs); \
2511  \
2512  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2513 
2514 // ------------------------------------------------------------------------
2515 #ifdef KMP_GOMP_COMPAT
2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2517  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2518  KMP_CHECK_GTID; \
2519  OP_CRITICAL_L_CPT(OP, 0); \
2520  return new_value; \
2521  }
2522 #else
2523 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2524 #endif /* KMP_GOMP_COMPAT */
2525 
2526 // ------------------------------------------------------------------------
2527 // Need separate macros for &&, || because there is no combined assignment
2528 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2529  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2530  TYPE new_value; \
2531  OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2532  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2533  }
2534 
2535 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2536  KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2537 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2538  KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2539 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2540  KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2541 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2542  KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2543 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2544  0) // __kmpc_atomic_fixed4_andl_cpt
2545 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2546  0) // __kmpc_atomic_fixed4_orl_cpt
2547 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2548  KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2549 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2550  KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2551 
2552 // -------------------------------------------------------------------------
2553 // Routines for Fortran operators that matched no one in C:
2554 // MAX, MIN, .EQV., .NEQV.
2555 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2556 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2557 
2558 // -------------------------------------------------------------------------
2559 // MIN and MAX need separate macros
2560 // OP - operator to check if we need any actions?
2561 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2562  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2563  \
2564  if (*lhs OP rhs) { /* still need actions? */ \
2565  old_value = *lhs; \
2566  *lhs = rhs; \
2567  if (flag) \
2568  new_value = rhs; \
2569  else \
2570  new_value = old_value; \
2571  } \
2572  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2573  return new_value;
2574 
2575 // -------------------------------------------------------------------------
2576 #ifdef KMP_GOMP_COMPAT
2577 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2578  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2579  KMP_CHECK_GTID; \
2580  MIN_MAX_CRITSECT_CPT(OP, 0); \
2581  }
2582 #else
2583 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2584 #endif /* KMP_GOMP_COMPAT */
2585 
2586 // -------------------------------------------------------------------------
2587 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2588  { \
2589  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2590  /*TYPE old_value; */ \
2591  temp_val = *lhs; \
2592  old_value = temp_val; \
2593  while (old_value OP rhs && /* still need actions? */ \
2594  !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2595  (kmp_int##BITS *)lhs, \
2596  *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2597  *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2598  KMP_CPU_PAUSE(); \
2599  temp_val = *lhs; \
2600  old_value = temp_val; \
2601  } \
2602  if (flag) \
2603  return rhs; \
2604  else \
2605  return old_value; \
2606  }
2607 
2608 // -------------------------------------------------------------------------
2609 // 1-byte, 2-byte operands - use critical section
2610 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2611  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2612  TYPE new_value, old_value; \
2613  if (*lhs OP rhs) { /* need actions? */ \
2614  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2615  MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2616  } \
2617  return *lhs; \
2618  }
2619 
2620 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2621  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2622  TYPE new_value, old_value; \
2623  if (*lhs OP rhs) { \
2624  GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2625  MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2626  } \
2627  return *lhs; \
2628  }
2629 
2630 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2631  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2632 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2633  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2634 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2635  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2636 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2637  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2638 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2639  0) // __kmpc_atomic_fixed4_max_cpt
2640 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2641  0) // __kmpc_atomic_fixed4_min_cpt
2642 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2643  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2644 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2645  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2646 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2647  KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2648 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2649  KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2650 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2651  KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2652 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2653  KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2654 #if KMP_HAVE_QUAD
2655 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2656  1) // __kmpc_atomic_float16_max_cpt
2657 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2658  1) // __kmpc_atomic_float16_min_cpt
2659 #if (KMP_ARCH_X86)
2660 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2661  1) // __kmpc_atomic_float16_max_a16_cpt
2662 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2663  1) // __kmpc_atomic_float16_mix_a16_cpt
2664 #endif
2665 #endif
2666 
2667 // ------------------------------------------------------------------------
2668 #ifdef KMP_GOMP_COMPAT
2669 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2670  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2671  KMP_CHECK_GTID; \
2672  OP_CRITICAL_CPT(OP, 0); \
2673  }
2674 #else
2675 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2676 #endif /* KMP_GOMP_COMPAT */
2677 // ------------------------------------------------------------------------
2678 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2679  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2680  TYPE new_value; \
2681  OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2682  OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2683  }
2684 
2685 // ------------------------------------------------------------------------
2686 
2687 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2688  KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2689 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2690  KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2691 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2692  KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2693 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2694  KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2695 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2696  KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2697 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2698  KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2699 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2700  KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2701 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2702  KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2703 
2704 // ------------------------------------------------------------------------
2705 // Routines for Extended types: long double, _Quad, complex flavours (use
2706 // critical section)
2707 // TYPE_ID, OP_ID, TYPE - detailed above
2708 // OP - operator
2709 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2710 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2711  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2712  TYPE new_value; \
2713  OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2714  OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2715  }
2716 
2717 // ------------------------------------------------------------------------
2718 // Workaround for cmplx4. Regular routines with return value don't work
2719 // on Win_32e. Let's return captured values through the additional parameter.
2720 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2721  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2722  \
2723  if (flag) { \
2724  (*lhs) OP rhs; \
2725  (*out) = (*lhs); \
2726  } else { \
2727  (*out) = (*lhs); \
2728  (*lhs) OP rhs; \
2729  } \
2730  \
2731  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2732  return;
2733 // ------------------------------------------------------------------------
2734 
2735 #ifdef KMP_GOMP_COMPAT
2736 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2737  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2738  KMP_CHECK_GTID; \
2739  OP_CRITICAL_CPT_WRK(OP## =, 0); \
2740  }
2741 #else
2742 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2743 #endif /* KMP_GOMP_COMPAT */
2744 // ------------------------------------------------------------------------
2745 
2746 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2747  void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2748  TYPE rhs, TYPE *out, int flag) { \
2749  KMP_DEBUG_ASSERT(__kmp_init_serial); \
2750  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2751 // ------------------------------------------------------------------------
2752 
2753 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2754  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2755  OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2756  OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2757  }
2758 // The end of workaround for cmplx4
2759 
2760 /* ------------------------------------------------------------------------- */
2761 // routines for long double type
2762 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2763  1) // __kmpc_atomic_float10_add_cpt
2764 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2765  1) // __kmpc_atomic_float10_sub_cpt
2766 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2767  1) // __kmpc_atomic_float10_mul_cpt
2768 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2769  1) // __kmpc_atomic_float10_div_cpt
2770 #if KMP_HAVE_QUAD
2771 // routines for _Quad type
2772 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2773  1) // __kmpc_atomic_float16_add_cpt
2774 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2775  1) // __kmpc_atomic_float16_sub_cpt
2776 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2777  1) // __kmpc_atomic_float16_mul_cpt
2778 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2779  1) // __kmpc_atomic_float16_div_cpt
2780 #if (KMP_ARCH_X86)
2781 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2782  1) // __kmpc_atomic_float16_add_a16_cpt
2783 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2784  1) // __kmpc_atomic_float16_sub_a16_cpt
2785 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2786  1) // __kmpc_atomic_float16_mul_a16_cpt
2787 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2788  1) // __kmpc_atomic_float16_div_a16_cpt
2789 #endif
2790 #endif
2791 
2792 // routines for complex types
2793 
2794 // cmplx4 routines to return void
2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2796  1) // __kmpc_atomic_cmplx4_add_cpt
2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2798  1) // __kmpc_atomic_cmplx4_sub_cpt
2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2800  1) // __kmpc_atomic_cmplx4_mul_cpt
2801 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2802  1) // __kmpc_atomic_cmplx4_div_cpt
2803 
2804 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2805  1) // __kmpc_atomic_cmplx8_add_cpt
2806 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2807  1) // __kmpc_atomic_cmplx8_sub_cpt
2808 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2809  1) // __kmpc_atomic_cmplx8_mul_cpt
2810 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2811  1) // __kmpc_atomic_cmplx8_div_cpt
2812 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2813  1) // __kmpc_atomic_cmplx10_add_cpt
2814 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2815  1) // __kmpc_atomic_cmplx10_sub_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2817  1) // __kmpc_atomic_cmplx10_mul_cpt
2818 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2819  1) // __kmpc_atomic_cmplx10_div_cpt
2820 #if KMP_HAVE_QUAD
2821 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2822  1) // __kmpc_atomic_cmplx16_add_cpt
2823 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2824  1) // __kmpc_atomic_cmplx16_sub_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2826  1) // __kmpc_atomic_cmplx16_mul_cpt
2827 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2828  1) // __kmpc_atomic_cmplx16_div_cpt
2829 #if (KMP_ARCH_X86)
2830 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2831  1) // __kmpc_atomic_cmplx16_add_a16_cpt
2832 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2833  1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2834 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2835  1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2836 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2837  1) // __kmpc_atomic_cmplx16_div_a16_cpt
2838 #endif
2839 #endif
2840 
2841 #if OMP_40_ENABLED
2842 
2843 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2844 // binop x; v = x; } for non-commutative operations.
2845 // Supported only on IA-32 architecture and Intel(R) 64
2846 
2847 // -------------------------------------------------------------------------
2848 // Operation on *lhs, rhs bound by critical section
2849 // OP - operator (it's supposed to contain an assignment)
2850 // LCK_ID - lock identifier
2851 // Note: don't check gtid as it should always be valid
2852 // 1, 2-byte - expect valid parameter, other - check before this macro
2853 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2854  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2855  \
2856  if (flag) { \
2857  /*temp_val = (*lhs);*/ \
2858  (*lhs) = (rhs)OP(*lhs); \
2859  new_value = (*lhs); \
2860  } else { \
2861  new_value = (*lhs); \
2862  (*lhs) = (rhs)OP(*lhs); \
2863  } \
2864  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2865  return new_value;
2866 
2867 // ------------------------------------------------------------------------
2868 #ifdef KMP_GOMP_COMPAT
2869 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2870  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2871  KMP_CHECK_GTID; \
2872  OP_CRITICAL_CPT_REV(OP, 0); \
2873  }
2874 #else
2875 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2876 #endif /* KMP_GOMP_COMPAT */
2877 
2878 // ------------------------------------------------------------------------
2879 // Operation on *lhs, rhs using "compare_and_store" routine
2880 // TYPE - operands' type
2881 // BITS - size in bits, used to distinguish low level calls
2882 // OP - operator
2883 // Note: temp_val introduced in order to force the compiler to read
2884 // *lhs only once (w/o it the compiler reads *lhs twice)
2885 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2886  { \
2887  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2888  TYPE old_value, new_value; \
2889  temp_val = *lhs; \
2890  old_value = temp_val; \
2891  new_value = rhs OP old_value; \
2892  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2893  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2894  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2895  KMP_CPU_PAUSE(); \
2896  \
2897  temp_val = *lhs; \
2898  old_value = temp_val; \
2899  new_value = rhs OP old_value; \
2900  } \
2901  if (flag) { \
2902  return new_value; \
2903  } else \
2904  return old_value; \
2905  }
2906 
2907 // -------------------------------------------------------------------------
2908 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2909  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2910  TYPE new_value; \
2911  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2912  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2913  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2914  }
2915 
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2917  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2919  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2921  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2923  KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2925  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2927  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2929  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2931  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2933  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2935  KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2937  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2939  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2941  KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2943  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2945  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2947  KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2949  KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2951  KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2953  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2955  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2957  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2959  KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2961  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2963  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2965  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2967  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2969  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2971  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2972 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2973 
2974 // ------------------------------------------------------------------------
2975 // Routines for Extended types: long double, _Quad, complex flavours (use
2976 // critical section)
2977 // TYPE_ID, OP_ID, TYPE - detailed above
2978 // OP - operator
2979 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2980 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2981  ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2982  TYPE new_value; \
2983  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2984  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2985  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2986  OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2987  }
2988 
2989 /* ------------------------------------------------------------------------- */
2990 // routines for long double type
2991 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2992  1) // __kmpc_atomic_float10_sub_cpt_rev
2993 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2994  1) // __kmpc_atomic_float10_div_cpt_rev
2995 #if KMP_HAVE_QUAD
2996 // routines for _Quad type
2997 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2998  1) // __kmpc_atomic_float16_sub_cpt_rev
2999 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3000  1) // __kmpc_atomic_float16_div_cpt_rev
3001 #if (KMP_ARCH_X86)
3002 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3003  1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3004 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3005  1) // __kmpc_atomic_float16_div_a16_cpt_rev
3006 #endif
3007 #endif
3008 
3009 // routines for complex types
3010 
3011 // ------------------------------------------------------------------------
3012 // Workaround for cmplx4. Regular routines with return value don't work
3013 // on Win_32e. Let's return captured values through the additional parameter.
3014 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3015  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3016  \
3017  if (flag) { \
3018  (*lhs) = (rhs)OP(*lhs); \
3019  (*out) = (*lhs); \
3020  } else { \
3021  (*out) = (*lhs); \
3022  (*lhs) = (rhs)OP(*lhs); \
3023  } \
3024  \
3025  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3026  return;
3027 // ------------------------------------------------------------------------
3028 
3029 #ifdef KMP_GOMP_COMPAT
3030 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3031  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3032  KMP_CHECK_GTID; \
3033  OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3034  }
3035 #else
3036 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3037 #endif /* KMP_GOMP_COMPAT */
3038 // ------------------------------------------------------------------------
3039 
3040 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3041  GOMP_FLAG) \
3042  ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3043  OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3044  OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3045  }
3046 // The end of workaround for cmplx4
3047 
3048 // !!! TODO: check if we need to return void for cmplx4 routines
3049 // cmplx4 routines to return void
3050 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3051  1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3053  1) // __kmpc_atomic_cmplx4_div_cpt_rev
3054 
3055 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3056  1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3058  1) // __kmpc_atomic_cmplx8_div_cpt_rev
3059 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3060  1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3062  1) // __kmpc_atomic_cmplx10_div_cpt_rev
3063 #if KMP_HAVE_QUAD
3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3065  1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3067  1) // __kmpc_atomic_cmplx16_div_cpt_rev
3068 #if (KMP_ARCH_X86)
3069 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3070  1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3071 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3072  1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3073 #endif
3074 #endif
3075 
3076 // Capture reverse for mixed type: RHS=float16
3077 #if KMP_HAVE_QUAD
3078 
3079 // Beginning of a definition (provides name, parameters, gebug trace)
3080 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3081 // fixed)
3082 // OP_ID - operation identifier (add, sub, mul, ...)
3083 // TYPE - operands' type
3084 // -------------------------------------------------------------------------
3085 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3086  RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3087  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3088  TYPE new_value; \
3089  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3090  OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3091  }
3092 
3093 // -------------------------------------------------------------------------
3094 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3095  LCK_ID, GOMP_FLAG) \
3096  ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3097  TYPE new_value; \
3098  OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3099  OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3100  }
3101 
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3103  KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3105  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3107  KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3109  KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3110 
3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3112  KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3114  1,
3115  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3116 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3117  KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3118 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3119  1,
3120  KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3121 
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3123  3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3125  4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3127  3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3129  4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3130 
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3132  7,
3133  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3135  8i, 7,
3136  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3138  7,
3139  KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3141  8i, 7,
3142  KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3143 
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3145  4r, 3,
3146  KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3148  4r, 3,
3149  KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3150 
3151 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3152  8r, 7,
3153  KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3154 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3155  8r, 7,
3156  KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3157 
3158 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3159  10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3160 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3161  10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3162 
3163 #endif // KMP_HAVE_QUAD
3164 
3165 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3166 
3167 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3168  TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3169  TYPE rhs) { \
3170  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3171  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3172 
3173 #define CRITICAL_SWP(LCK_ID) \
3174  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3175  \
3176  old_value = (*lhs); \
3177  (*lhs) = rhs; \
3178  \
3179  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3180  return old_value;
3181 
3182 // ------------------------------------------------------------------------
3183 #ifdef KMP_GOMP_COMPAT
3184 #define GOMP_CRITICAL_SWP(FLAG) \
3185  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3186  KMP_CHECK_GTID; \
3187  CRITICAL_SWP(0); \
3188  }
3189 #else
3190 #define GOMP_CRITICAL_SWP(FLAG)
3191 #endif /* KMP_GOMP_COMPAT */
3192 
3193 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3194  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3195  TYPE old_value; \
3196  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3197  old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3198  return old_value; \
3199  }
3200 // ------------------------------------------------------------------------
3201 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3202  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3203  TYPE old_value; \
3204  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3205  old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3206  return old_value; \
3207  }
3208 
3209 // ------------------------------------------------------------------------
3210 #define CMPXCHG_SWP(TYPE, BITS) \
3211  { \
3212  TYPE KMP_ATOMIC_VOLATILE temp_val; \
3213  TYPE old_value, new_value; \
3214  temp_val = *lhs; \
3215  old_value = temp_val; \
3216  new_value = rhs; \
3217  while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3218  (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3219  *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3220  KMP_CPU_PAUSE(); \
3221  \
3222  temp_val = *lhs; \
3223  old_value = temp_val; \
3224  new_value = rhs; \
3225  } \
3226  return old_value; \
3227  }
3228 
3229 // -------------------------------------------------------------------------
3230 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3231  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3232  TYPE old_value; \
3233  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3234  CMPXCHG_SWP(TYPE, BITS) \
3235  }
3236 
3237 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3238 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3239 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3240 
3241 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3242  KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3243 
3244 #if (KMP_ARCH_X86)
3245 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3246  KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3247 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3248  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3249 #else
3250 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3251 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3252  KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3253 #endif
3254 
3255 // ------------------------------------------------------------------------
3256 // Routines for Extended types: long double, _Quad, complex flavours (use
3257 // critical section)
3258 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3259  ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3260  TYPE old_value; \
3261  GOMP_CRITICAL_SWP(GOMP_FLAG) \
3262  CRITICAL_SWP(LCK_ID) \
3263  }
3264 
3265 // ------------------------------------------------------------------------
3266 // !!! TODO: check if we need to return void for cmplx4 routines
3267 // Workaround for cmplx4. Regular routines with return value don't work
3268 // on Win_32e. Let's return captured values through the additional parameter.
3269 
3270 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3271  void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3272  TYPE rhs, TYPE *out) { \
3273  KMP_DEBUG_ASSERT(__kmp_init_serial); \
3274  KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3275 
3276 #define CRITICAL_SWP_WRK(LCK_ID) \
3277  __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3278  \
3279  tmp = (*lhs); \
3280  (*lhs) = (rhs); \
3281  (*out) = tmp; \
3282  __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3283  return;
3284 // ------------------------------------------------------------------------
3285 
3286 #ifdef KMP_GOMP_COMPAT
3287 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3288  if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3289  KMP_CHECK_GTID; \
3290  CRITICAL_SWP_WRK(0); \
3291  }
3292 #else
3293 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3294 #endif /* KMP_GOMP_COMPAT */
3295 // ------------------------------------------------------------------------
3296 
3297 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3298  ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3299  TYPE tmp; \
3300  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3301  CRITICAL_SWP_WRK(LCK_ID) \
3302  }
3303 // The end of workaround for cmplx4
3304 
3305 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3306 #if KMP_HAVE_QUAD
3307 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3308 #endif
3309 // cmplx4 routine to return void
3310 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3311 
3312 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3313 // __kmpc_atomic_cmplx4_swp
3314 
3315 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3316 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3317 #if KMP_HAVE_QUAD
3318 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3319 #if (KMP_ARCH_X86)
3320 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3321  1) // __kmpc_atomic_float16_a16_swp
3322 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3323  1) // __kmpc_atomic_cmplx16_a16_swp
3324 #endif
3325 #endif
3326 
3327 // End of OpenMP 4.0 Capture
3328 
3329 #endif // OMP_40_ENABLED
3330 
3331 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3332 
3333 #undef OP_CRITICAL
3334 
3335 /* ------------------------------------------------------------------------ */
3336 /* Generic atomic routines */
3337 
3338 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3339  void (*f)(void *, void *, void *)) {
3340  KMP_DEBUG_ASSERT(__kmp_init_serial);
3341 
3342  if (
3343 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3344  FALSE /* must use lock */
3345 #else
3346  TRUE
3347 #endif
3348  ) {
3349  kmp_int8 old_value, new_value;
3350 
3351  old_value = *(kmp_int8 *)lhs;
3352  (*f)(&new_value, &old_value, rhs);
3353 
3354  /* TODO: Should this be acquire or release? */
3355  while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3356  *(kmp_int8 *)&new_value)) {
3357  KMP_CPU_PAUSE();
3358 
3359  old_value = *(kmp_int8 *)lhs;
3360  (*f)(&new_value, &old_value, rhs);
3361  }
3362 
3363  return;
3364  } else {
3365 // All 1-byte data is of integer data type.
3366 
3367 #ifdef KMP_GOMP_COMPAT
3368  if (__kmp_atomic_mode == 2) {
3369  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3370  } else
3371 #endif /* KMP_GOMP_COMPAT */
3372  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3373 
3374  (*f)(lhs, lhs, rhs);
3375 
3376 #ifdef KMP_GOMP_COMPAT
3377  if (__kmp_atomic_mode == 2) {
3378  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3379  } else
3380 #endif /* KMP_GOMP_COMPAT */
3381  __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3382  }
3383 }
3384 
3385 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3386  void (*f)(void *, void *, void *)) {
3387  if (
3388 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3389  FALSE /* must use lock */
3390 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3391  TRUE /* no alignment problems */
3392 #else
3393  !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3394 #endif
3395  ) {
3396  kmp_int16 old_value, new_value;
3397 
3398  old_value = *(kmp_int16 *)lhs;
3399  (*f)(&new_value, &old_value, rhs);
3400 
3401  /* TODO: Should this be acquire or release? */
3402  while (!KMP_COMPARE_AND_STORE_ACQ16(
3403  (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3404  KMP_CPU_PAUSE();
3405 
3406  old_value = *(kmp_int16 *)lhs;
3407  (*f)(&new_value, &old_value, rhs);
3408  }
3409 
3410  return;
3411  } else {
3412 // All 2-byte data is of integer data type.
3413 
3414 #ifdef KMP_GOMP_COMPAT
3415  if (__kmp_atomic_mode == 2) {
3416  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3417  } else
3418 #endif /* KMP_GOMP_COMPAT */
3419  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3420 
3421  (*f)(lhs, lhs, rhs);
3422 
3423 #ifdef KMP_GOMP_COMPAT
3424  if (__kmp_atomic_mode == 2) {
3425  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3426  } else
3427 #endif /* KMP_GOMP_COMPAT */
3428  __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3429  }
3430 }
3431 
3432 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3433  void (*f)(void *, void *, void *)) {
3434  KMP_DEBUG_ASSERT(__kmp_init_serial);
3435 
3436  if (
3437 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3438 // Gomp compatibility is broken if this routine is called for floats.
3439 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3440  TRUE /* no alignment problems */
3441 #else
3442  !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3443 #endif
3444  ) {
3445  kmp_int32 old_value, new_value;
3446 
3447  old_value = *(kmp_int32 *)lhs;
3448  (*f)(&new_value, &old_value, rhs);
3449 
3450  /* TODO: Should this be acquire or release? */
3451  while (!KMP_COMPARE_AND_STORE_ACQ32(
3452  (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3453  KMP_CPU_PAUSE();
3454 
3455  old_value = *(kmp_int32 *)lhs;
3456  (*f)(&new_value, &old_value, rhs);
3457  }
3458 
3459  return;
3460  } else {
3461 // Use __kmp_atomic_lock_4i for all 4-byte data,
3462 // even if it isn't of integer data type.
3463 
3464 #ifdef KMP_GOMP_COMPAT
3465  if (__kmp_atomic_mode == 2) {
3466  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3467  } else
3468 #endif /* KMP_GOMP_COMPAT */
3469  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3470 
3471  (*f)(lhs, lhs, rhs);
3472 
3473 #ifdef KMP_GOMP_COMPAT
3474  if (__kmp_atomic_mode == 2) {
3475  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3476  } else
3477 #endif /* KMP_GOMP_COMPAT */
3478  __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3479  }
3480 }
3481 
3482 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3483  void (*f)(void *, void *, void *)) {
3484  KMP_DEBUG_ASSERT(__kmp_init_serial);
3485  if (
3486 
3487 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3488  FALSE /* must use lock */
3489 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3490  TRUE /* no alignment problems */
3491 #else
3492  !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3493 #endif
3494  ) {
3495  kmp_int64 old_value, new_value;
3496 
3497  old_value = *(kmp_int64 *)lhs;
3498  (*f)(&new_value, &old_value, rhs);
3499  /* TODO: Should this be acquire or release? */
3500  while (!KMP_COMPARE_AND_STORE_ACQ64(
3501  (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3502  KMP_CPU_PAUSE();
3503 
3504  old_value = *(kmp_int64 *)lhs;
3505  (*f)(&new_value, &old_value, rhs);
3506  }
3507 
3508  return;
3509  } else {
3510 // Use __kmp_atomic_lock_8i for all 8-byte data,
3511 // even if it isn't of integer data type.
3512 
3513 #ifdef KMP_GOMP_COMPAT
3514  if (__kmp_atomic_mode == 2) {
3515  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3516  } else
3517 #endif /* KMP_GOMP_COMPAT */
3518  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3519 
3520  (*f)(lhs, lhs, rhs);
3521 
3522 #ifdef KMP_GOMP_COMPAT
3523  if (__kmp_atomic_mode == 2) {
3524  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3525  } else
3526 #endif /* KMP_GOMP_COMPAT */
3527  __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3528  }
3529 }
3530 
3531 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3532  void (*f)(void *, void *, void *)) {
3533  KMP_DEBUG_ASSERT(__kmp_init_serial);
3534 
3535 #ifdef KMP_GOMP_COMPAT
3536  if (__kmp_atomic_mode == 2) {
3537  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3538  } else
3539 #endif /* KMP_GOMP_COMPAT */
3540  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3541 
3542  (*f)(lhs, lhs, rhs);
3543 
3544 #ifdef KMP_GOMP_COMPAT
3545  if (__kmp_atomic_mode == 2) {
3546  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3547  } else
3548 #endif /* KMP_GOMP_COMPAT */
3549  __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3550 }
3551 
3552 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3553  void (*f)(void *, void *, void *)) {
3554  KMP_DEBUG_ASSERT(__kmp_init_serial);
3555 
3556 #ifdef KMP_GOMP_COMPAT
3557  if (__kmp_atomic_mode == 2) {
3558  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3559  } else
3560 #endif /* KMP_GOMP_COMPAT */
3561  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3562 
3563  (*f)(lhs, lhs, rhs);
3564 
3565 #ifdef KMP_GOMP_COMPAT
3566  if (__kmp_atomic_mode == 2) {
3567  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3568  } else
3569 #endif /* KMP_GOMP_COMPAT */
3570  __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3571 }
3572 
3573 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3574  void (*f)(void *, void *, void *)) {
3575  KMP_DEBUG_ASSERT(__kmp_init_serial);
3576 
3577 #ifdef KMP_GOMP_COMPAT
3578  if (__kmp_atomic_mode == 2) {
3579  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3580  } else
3581 #endif /* KMP_GOMP_COMPAT */
3582  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3583 
3584  (*f)(lhs, lhs, rhs);
3585 
3586 #ifdef KMP_GOMP_COMPAT
3587  if (__kmp_atomic_mode == 2) {
3588  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3589  } else
3590 #endif /* KMP_GOMP_COMPAT */
3591  __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3592 }
3593 
3594 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3595  void (*f)(void *, void *, void *)) {
3596  KMP_DEBUG_ASSERT(__kmp_init_serial);
3597 
3598 #ifdef KMP_GOMP_COMPAT
3599  if (__kmp_atomic_mode == 2) {
3600  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3601  } else
3602 #endif /* KMP_GOMP_COMPAT */
3603  __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3604 
3605  (*f)(lhs, lhs, rhs);
3606 
3607 #ifdef KMP_GOMP_COMPAT
3608  if (__kmp_atomic_mode == 2) {
3609  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3610  } else
3611 #endif /* KMP_GOMP_COMPAT */
3612  __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3613 }
3614 
3615 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3616 // compiler; duplicated in order to not use 3-party names in pure Intel code
3617 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3618 void __kmpc_atomic_start(void) {
3619  int gtid = __kmp_entry_gtid();
3620  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3621  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3622 }
3623 
3624 void __kmpc_atomic_end(void) {
3625  int gtid = __kmp_get_gtid();
3626  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3627  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3628 }
3629 
3634 // end of file
Definition: kmp.h:208