LLVM OpenMP* Runtime Library
kmp_lock.h
1 /*
2  * kmp_lock.h -- lock header file
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_LOCK_H
17 #define KMP_LOCK_H
18 
19 #include <limits.h> // CHAR_BIT
20 #include <stddef.h> // offsetof
21 
22 #include "kmp_os.h"
23 #include "kmp_debug.h"
24 
25 #ifdef __cplusplus
26 #include <atomic>
27 
28 extern "C" {
29 #endif // __cplusplus
30 
31 // ----------------------------------------------------------------------------
32 // Have to copy these definitions from kmp.h because kmp.h cannot be included
33 // due to circular dependencies. Will undef these at end of file.
34 
35 #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
36 #define KMP_GTID_DNE (-2)
37 
38 // Forward declaration of ident and ident_t
39 
40 struct ident;
41 typedef struct ident ident_t;
42 
43 // End of copied code.
44 // ----------------------------------------------------------------------------
45 
46 //
47 // We need to know the size of the area we can assume that the compiler(s)
48 // allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel
49 // compiler always allocates a pointer-sized area, as does visual studio.
50 //
51 // gcc however, only allocates 4 bytes for regular locks, even on 64-bit
52 // intel archs. It allocates at least 8 bytes for nested lock (more on
53 // recent versions), but we are bounded by the pointer-sized chunks that
54 // the Intel compiler allocates.
55 //
56 
57 #if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT)
58 # define OMP_LOCK_T_SIZE sizeof(int)
59 # define OMP_NEST_LOCK_T_SIZE sizeof(void *)
60 #else
61 # define OMP_LOCK_T_SIZE sizeof(void *)
62 # define OMP_NEST_LOCK_T_SIZE sizeof(void *)
63 #endif
64 
65 //
66 // The Intel compiler allocates a 32-byte chunk for a critical section.
67 // Both gcc and visual studio only allocate enough space for a pointer.
68 // Sometimes we know that the space was allocated by the Intel compiler.
69 //
70 #define OMP_CRITICAL_SIZE sizeof(void *)
71 #define INTEL_CRITICAL_SIZE 32
72 
73 //
74 // lock flags
75 //
76 typedef kmp_uint32 kmp_lock_flags_t;
77 
78 #define kmp_lf_critical_section 1
79 
80 //
81 // When a lock table is used, the indices are of kmp_lock_index_t
82 //
83 typedef kmp_uint32 kmp_lock_index_t;
84 
85 //
86 // When memory allocated for locks are on the lock pool (free list),
87 // it is treated as structs of this type.
88 //
89 struct kmp_lock_pool {
90  union kmp_user_lock *next;
91  kmp_lock_index_t index;
92 };
93 
94 typedef struct kmp_lock_pool kmp_lock_pool_t;
95 
96 
97 extern void __kmp_validate_locks( void );
98 
99 
100 // ----------------------------------------------------------------------------
101 //
102 // There are 5 lock implementations:
103 //
104 // 1. Test and set locks.
105 // 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture)
106 // 3. Ticket (Lamport bakery) locks.
107 // 4. Queuing locks (with separate spin fields).
108 // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks
109 //
110 // and 3 lock purposes:
111 //
112 // 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time.
113 // These do not require non-negative global thread ID's.
114 // 2. Internal RTL locks -- Used everywhere else in the RTL
115 // 3. User locks (includes critical sections)
116 //
117 // ----------------------------------------------------------------------------
118 
119 
120 // ============================================================================
121 // Lock implementations.
122 // ============================================================================
123 
124 
125 // ----------------------------------------------------------------------------
126 // Test and set locks.
127 //
128 // Non-nested test and set locks differ from the other lock kinds (except
129 // futex) in that we use the memory allocated by the compiler for the lock,
130 // rather than a pointer to it.
131 //
132 // On lin32, lin_32e, and win_32, the space allocated may be as small as 4
133 // bytes, so we have to use a lock table for nested locks, and avoid accessing
134 // the depth_locked field for non-nested locks.
135 //
136 // Information normally available to the tools, such as lock location,
137 // lock usage (normal lock vs. critical section), etc. is not available with
138 // test and set locks.
139 // ----------------------------------------------------------------------------
140 
141 struct kmp_base_tas_lock {
142  volatile kmp_int32 poll; // 0 => unlocked
143  // locked: (gtid+1) of owning thread
144  kmp_int32 depth_locked; // depth locked, for nested locks only
145 };
146 
147 typedef struct kmp_base_tas_lock kmp_base_tas_lock_t;
148 
149 union kmp_tas_lock {
150  kmp_base_tas_lock_t lk;
151  kmp_lock_pool_t pool; // make certain struct is large enough
152  double lk_align; // use worst case alignment
153  // no cache line padding
154 };
155 
156 typedef union kmp_tas_lock kmp_tas_lock_t;
157 
158 //
159 // Static initializer for test and set lock variables. Usage:
160 // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock );
161 //
162 #define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } }
163 
164 extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
165 extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
166 extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
167 extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck );
168 extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck );
169 
170 extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
171 extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
172 extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid );
173 extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck );
174 extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck );
175 
176 #define KMP_LOCK_RELEASED 1
177 #define KMP_LOCK_STILL_HELD 0
178 #define KMP_LOCK_ACQUIRED_FIRST 1
179 #define KMP_LOCK_ACQUIRED_NEXT 0
180 
181 #define KMP_USE_FUTEX (KMP_OS_LINUX && !KMP_OS_CNK && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64))
182 
183 #if KMP_USE_FUTEX
184 
185 // ----------------------------------------------------------------------------
186 // futex locks. futex locks are only available on Linux* OS.
187 //
188 // Like non-nested test and set lock, non-nested futex locks use the memory
189 // allocated by the compiler for the lock, rather than a pointer to it.
190 //
191 // Information normally available to the tools, such as lock location,
192 // lock usage (normal lock vs. critical section), etc. is not available with
193 // test and set locks. With non-nested futex locks, the lock owner is not
194 // even available.
195 // ----------------------------------------------------------------------------
196 
197 struct kmp_base_futex_lock {
198  volatile kmp_int32 poll; // 0 => unlocked
199  // 2*(gtid+1) of owning thread, 0 if unlocked
200  // locked: (gtid+1) of owning thread
201  kmp_int32 depth_locked; // depth locked, for nested locks only
202 };
203 
204 typedef struct kmp_base_futex_lock kmp_base_futex_lock_t;
205 
206 union kmp_futex_lock {
207  kmp_base_futex_lock_t lk;
208  kmp_lock_pool_t pool; // make certain struct is large enough
209  double lk_align; // use worst case alignment
210  // no cache line padding
211 };
212 
213 typedef union kmp_futex_lock kmp_futex_lock_t;
214 
215 //
216 // Static initializer for futex lock variables. Usage:
217 // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock );
218 //
219 #define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } }
220 
221 extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
222 extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
223 extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
224 extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck );
225 extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck );
226 
227 extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
228 extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
229 extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid );
230 extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck );
231 extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck );
232 
233 #endif // KMP_USE_FUTEX
234 
235 
236 // ----------------------------------------------------------------------------
237 // Ticket locks.
238 // ----------------------------------------------------------------------------
239 
240 #ifdef __cplusplus
241 
242 #ifdef _MSC_VER
243 // MSVC won't allow use of std::atomic<> in a union since it has non-trivial copy constructor.
244 
245 struct kmp_base_ticket_lock {
246  // `initialized' must be the first entry in the lock data structure!
247  std::atomic_bool initialized;
248  volatile union kmp_ticket_lock *self; // points to the lock union
249  ident_t const * location; // Source code location of omp_init_lock().
250  std::atomic_uint next_ticket; // ticket number to give to next thread which acquires
251  std::atomic_uint now_serving; // ticket number for thread which holds the lock
252  std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked
253  std::atomic_int depth_locked; // depth locked, for nested locks only
254  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
255 };
256 #else
257 struct kmp_base_ticket_lock {
258  // `initialized' must be the first entry in the lock data structure!
259  std::atomic<bool> initialized;
260  volatile union kmp_ticket_lock *self; // points to the lock union
261  ident_t const * location; // Source code location of omp_init_lock().
262  std::atomic<unsigned> next_ticket; // ticket number to give to next thread which acquires
263  std::atomic<unsigned> now_serving; // ticket number for thread which holds the lock
264  std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked
265  std::atomic<int> depth_locked; // depth locked, for nested locks only
266  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
267 };
268 #endif
269 
270 #else // __cplusplus
271 
272 struct kmp_base_ticket_lock;
273 
274 #endif // !__cplusplus
275 
276 typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t;
277 
278 union KMP_ALIGN_CACHE kmp_ticket_lock {
279  kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing.
280  kmp_lock_pool_t pool;
281  double lk_align; // use worst case alignment
282  char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ];
283 };
284 
285 typedef union kmp_ticket_lock kmp_ticket_lock_t;
286 
287 //
288 // Static initializer for simple ticket lock variables. Usage:
289 // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock );
290 // Note the macro argument. It is important to make var properly initialized.
291 //
292 #define KMP_TICKET_LOCK_INITIALIZER( lock ) { { ATOMIC_VAR_INIT(true), \
293  &(lock), \
294  NULL, \
295  ATOMIC_VAR_INIT(0U), \
296  ATOMIC_VAR_INIT(0U), \
297  ATOMIC_VAR_INIT(0), \
298  ATOMIC_VAR_INIT(-1) } }
299 
300 extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
301 extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
302 extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid );
303 extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
304 extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck );
305 extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck );
306 
307 extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
308 extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
309 extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid );
310 extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck );
311 extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck );
312 
313 
314 // ----------------------------------------------------------------------------
315 // Queuing locks.
316 // ----------------------------------------------------------------------------
317 
318 #if KMP_USE_ADAPTIVE_LOCKS
319 
320 struct kmp_adaptive_lock_info;
321 
322 typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t;
323 
324 #if KMP_DEBUG_ADAPTIVE_LOCKS
325 
326 struct kmp_adaptive_lock_statistics {
327  /* So we can get stats from locks that haven't been destroyed. */
328  kmp_adaptive_lock_info_t * next;
329  kmp_adaptive_lock_info_t * prev;
330 
331  /* Other statistics */
332  kmp_uint32 successfulSpeculations;
333  kmp_uint32 hardFailedSpeculations;
334  kmp_uint32 softFailedSpeculations;
335  kmp_uint32 nonSpeculativeAcquires;
336  kmp_uint32 nonSpeculativeAcquireAttempts;
337  kmp_uint32 lemmingYields;
338 };
339 
340 typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t;
341 
342 extern void __kmp_print_speculative_stats();
343 extern void __kmp_init_speculative_stats();
344 
345 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
346 
347 struct kmp_adaptive_lock_info
348 {
349  /* Values used for adaptivity.
350  * Although these are accessed from multiple threads we don't access them atomically,
351  * because if we miss updates it probably doesn't matter much. (It just affects our
352  * decision about whether to try speculation on the lock).
353  */
354  kmp_uint32 volatile badness;
355  kmp_uint32 volatile acquire_attempts;
356  /* Parameters of the lock. */
357  kmp_uint32 max_badness;
358  kmp_uint32 max_soft_retries;
359 
360 #if KMP_DEBUG_ADAPTIVE_LOCKS
361  kmp_adaptive_lock_statistics_t volatile stats;
362 #endif
363 };
364 
365 #endif // KMP_USE_ADAPTIVE_LOCKS
366 
367 
368 struct kmp_base_queuing_lock {
369 
370  // `initialized' must be the first entry in the lock data structure!
371  volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state.
372 
373  ident_t const * location; // Source code location of omp_init_lock().
374 
375  KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned!
376 
377  volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty
378  // Must be no padding here since head/tail used in 8-byte CAS
379  volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty
380  // Decl order assumes little endian
381  // bakery-style lock
382  volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires
383  volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock
384  volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
385  kmp_int32 depth_locked; // depth locked, for nested locks only
386 
387  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
388 };
389 
390 typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t;
391 
392 KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 );
393 
394 union KMP_ALIGN_CACHE kmp_queuing_lock {
395  kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing.
396  kmp_lock_pool_t pool;
397  double lk_align; // use worst case alignment
398  char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ];
399 };
400 
401 typedef union kmp_queuing_lock kmp_queuing_lock_t;
402 
403 extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
404 extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
405 extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
406 extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck );
407 extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck );
408 
409 extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
410 extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
411 extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid );
412 extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck );
413 extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck );
414 
415 #if KMP_USE_ADAPTIVE_LOCKS
416 
417 // ----------------------------------------------------------------------------
418 // Adaptive locks.
419 // ----------------------------------------------------------------------------
420 struct kmp_base_adaptive_lock {
421  kmp_base_queuing_lock qlk;
422  KMP_ALIGN(CACHE_LINE)
423  kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock
424 };
425 
426 typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t;
427 
428 union KMP_ALIGN_CACHE kmp_adaptive_lock {
429  kmp_base_adaptive_lock_t lk;
430  kmp_lock_pool_t pool;
431  double lk_align;
432  char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ];
433 };
434 typedef union kmp_adaptive_lock kmp_adaptive_lock_t;
435 
436 # define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk)
437 
438 #endif // KMP_USE_ADAPTIVE_LOCKS
439 
440 // ----------------------------------------------------------------------------
441 // DRDPA ticket locks.
442 // ----------------------------------------------------------------------------
443 
444 struct kmp_base_drdpa_lock {
445  //
446  // All of the fields on the first cache line are only written when
447  // initializing or reconfiguring the lock. These are relatively rare
448  // operations, so data from the first cache line will usually stay
449  // resident in the cache of each thread trying to acquire the lock.
450  //
451  // initialized must be the first entry in the lock data structure!
452  //
453  KMP_ALIGN_CACHE
454 
455  volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state
456  ident_t const * location; // Source code location of omp_init_lock().
457  volatile struct kmp_lock_poll {
458  kmp_uint64 poll;
459  } * volatile polls;
460  volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op
461  kmp_uint64 cleanup_ticket; // thread with cleanup ticket
462  volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls
463  kmp_uint32 num_polls; // must be power of 2
464 
465  //
466  // next_ticket it needs to exist in a separate cache line, as it is
467  // invalidated every time a thread takes a new ticket.
468  //
469  KMP_ALIGN_CACHE
470 
471  volatile kmp_uint64 next_ticket;
472 
473  //
474  // now_serving is used to store our ticket value while we hold the lock.
475  // It has a slightly different meaning in the DRDPA ticket locks (where
476  // it is written by the acquiring thread) than it does in the simple
477  // ticket locks (where it is written by the releasing thread).
478  //
479  // Since now_serving is only read an written in the critical section,
480  // it is non-volatile, but it needs to exist on a separate cache line,
481  // as it is invalidated at every lock acquire.
482  //
483  // Likewise, the vars used for nested locks (owner_id and depth_locked)
484  // are only written by the thread owning the lock, so they are put in
485  // this cache line. owner_id is read by other threads, so it must be
486  // declared volatile.
487  //
488  KMP_ALIGN_CACHE
489 
490  kmp_uint64 now_serving; // doesn't have to be volatile
491  volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked
492  kmp_int32 depth_locked; // depth locked
493  kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock
494 };
495 
496 typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t;
497 
498 union KMP_ALIGN_CACHE kmp_drdpa_lock {
499  kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */
500  kmp_lock_pool_t pool;
501  double lk_align; // use worst case alignment
502  char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ];
503 };
504 
505 typedef union kmp_drdpa_lock kmp_drdpa_lock_t;
506 
507 extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
508 extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
509 extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
510 extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck );
511 extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck );
512 
513 extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
514 extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
515 extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid );
516 extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
517 extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck );
518 
519 
520 // ============================================================================
521 // Lock purposes.
522 // ============================================================================
523 
524 
525 // ----------------------------------------------------------------------------
526 // Bootstrap locks.
527 // ----------------------------------------------------------------------------
528 
529 // Bootstrap locks -- very few locks used at library initialization time.
530 // Bootstrap locks are currently implemented as ticket locks.
531 // They could also be implemented as test and set lock, but cannot be
532 // implemented with other lock kinds as they require gtids which are not
533 // available at initialization time.
534 
535 typedef kmp_ticket_lock_t kmp_bootstrap_lock_t;
536 
537 #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) )
538 
539 static inline int
540 __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck )
541 {
542  return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE );
543 }
544 
545 static inline int
546 __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck )
547 {
548  return __kmp_test_ticket_lock( lck, KMP_GTID_DNE );
549 }
550 
551 static inline void
552 __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck )
553 {
554  __kmp_release_ticket_lock( lck, KMP_GTID_DNE );
555 }
556 
557 static inline void
558 __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck )
559 {
560  __kmp_init_ticket_lock( lck );
561 }
562 
563 static inline void
564 __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )
565 {
566  __kmp_destroy_ticket_lock( lck );
567 }
568 
569 
570 // ----------------------------------------------------------------------------
571 // Internal RTL locks.
572 // ----------------------------------------------------------------------------
573 
574 //
575 // Internal RTL locks are also implemented as ticket locks, for now.
576 //
577 // FIXME - We should go through and figure out which lock kind works best for
578 // each internal lock, and use the type declaration and function calls for
579 // that explicit lock kind (and get rid of this section).
580 //
581 
582 typedef kmp_ticket_lock_t kmp_lock_t;
583 
584 static inline int
585 __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid )
586 {
587  return __kmp_acquire_ticket_lock( lck, gtid );
588 }
589 
590 static inline int
591 __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid )
592 {
593  return __kmp_test_ticket_lock( lck, gtid );
594 }
595 
596 static inline void
597 __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid )
598 {
599  __kmp_release_ticket_lock( lck, gtid );
600 }
601 
602 static inline void
603 __kmp_init_lock( kmp_lock_t *lck )
604 {
605  __kmp_init_ticket_lock( lck );
606 }
607 
608 static inline void
609 __kmp_destroy_lock( kmp_lock_t *lck )
610 {
611  __kmp_destroy_ticket_lock( lck );
612 }
613 
614 
615 // ----------------------------------------------------------------------------
616 // User locks.
617 // ----------------------------------------------------------------------------
618 
619 //
620 // Do not allocate objects of type union kmp_user_lock!!!
621 // This will waste space unless __kmp_user_lock_kind == lk_drdpa.
622 // Instead, check the value of __kmp_user_lock_kind and allocate objects of
623 // the type of the appropriate union member, and cast their addresses to
624 // kmp_user_lock_p.
625 //
626 
627 enum kmp_lock_kind {
628  lk_default = 0,
629  lk_tas,
630 #if KMP_USE_FUTEX
631  lk_futex,
632 #endif
633 #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX
634  lk_hle,
635  lk_rtm,
636 #endif
637  lk_ticket,
638  lk_queuing,
639  lk_drdpa,
640 #if KMP_USE_ADAPTIVE_LOCKS
641  lk_adaptive
642 #endif // KMP_USE_ADAPTIVE_LOCKS
643 };
644 
645 typedef enum kmp_lock_kind kmp_lock_kind_t;
646 
647 extern kmp_lock_kind_t __kmp_user_lock_kind;
648 
649 union kmp_user_lock {
650  kmp_tas_lock_t tas;
651 #if KMP_USE_FUTEX
652  kmp_futex_lock_t futex;
653 #endif
654  kmp_ticket_lock_t ticket;
655  kmp_queuing_lock_t queuing;
656  kmp_drdpa_lock_t drdpa;
657 #if KMP_USE_ADAPTIVE_LOCKS
658  kmp_adaptive_lock_t adaptive;
659 #endif // KMP_USE_ADAPTIVE_LOCKS
660  kmp_lock_pool_t pool;
661 };
662 
663 typedef union kmp_user_lock *kmp_user_lock_p;
664 
665 #if ! KMP_USE_DYNAMIC_LOCK
666 
667 extern size_t __kmp_base_user_lock_size;
668 extern size_t __kmp_user_lock_size;
669 
670 extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck );
671 
672 static inline kmp_int32
673 __kmp_get_user_lock_owner( kmp_user_lock_p lck )
674 {
675  KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL );
676  return ( *__kmp_get_user_lock_owner_ )( lck );
677 }
678 
679 extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
680 
681 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
682 
683 #define __kmp_acquire_user_lock_with_checks(lck,gtid) \
684  if (__kmp_user_lock_kind == lk_tas) { \
685  if ( __kmp_env_consistency_check ) { \
686  char const * const func = "omp_set_lock"; \
687  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \
688  && lck->tas.lk.depth_locked != -1 ) { \
689  KMP_FATAL( LockNestableUsedAsSimple, func ); \
690  } \
691  if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \
692  KMP_FATAL( LockIsAlreadyOwned, func ); \
693  } \
694  } \
695  if ( ( lck->tas.lk.poll != 0 ) || \
696  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
697  kmp_uint32 spins; \
698  KMP_FSYNC_PREPARE( lck ); \
699  KMP_INIT_YIELD( spins ); \
700  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
701  KMP_YIELD( TRUE ); \
702  } else { \
703  KMP_YIELD_SPIN( spins ); \
704  } \
705  while ( ( lck->tas.lk.poll != 0 ) || \
706  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
707  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
708  KMP_YIELD( TRUE ); \
709  } else { \
710  KMP_YIELD_SPIN( spins ); \
711  } \
712  } \
713  } \
714  KMP_FSYNC_ACQUIRED( lck ); \
715  } else { \
716  KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \
717  ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \
718  }
719 
720 #else
721 static inline int
722 __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
723 {
724  KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL );
725  return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid );
726 }
727 #endif
728 
729 extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
730 
731 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
732 
733 #include "kmp_i18n.h" /* AC: KMP_FATAL definition */
734 extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */
735 static inline int
736 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
737 {
738  if ( __kmp_user_lock_kind == lk_tas ) {
739  if ( __kmp_env_consistency_check ) {
740  char const * const func = "omp_test_lock";
741  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
742  && lck->tas.lk.depth_locked != -1 ) {
743  KMP_FATAL( LockNestableUsedAsSimple, func );
744  }
745  }
746  return ( ( lck->tas.lk.poll == 0 ) &&
747  KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
748  } else {
749  KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
750  return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
751  }
752 }
753 #else
754 static inline int
755 __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
756 {
757  KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL );
758  return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid );
759 }
760 #endif
761 
762 extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
763 
764 static inline void
765 __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
766 {
767  KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL );
768  ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid );
769 }
770 
771 extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck );
772 
773 static inline void
774 __kmp_init_user_lock_with_checks( kmp_user_lock_p lck )
775 {
776  KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL );
777  ( *__kmp_init_user_lock_with_checks_ )( lck );
778 }
779 
780 //
781 // We need a non-checking version of destroy lock for when the RTL is
782 // doing the cleanup as it can't always tell if the lock is nested or not.
783 //
784 extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck );
785 
786 static inline void
787 __kmp_destroy_user_lock( kmp_user_lock_p lck )
788 {
789  KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL );
790  ( *__kmp_destroy_user_lock_ )( lck );
791 }
792 
793 extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck );
794 
795 static inline void
796 __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck )
797 {
798  KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL );
799  ( *__kmp_destroy_user_lock_with_checks_ )( lck );
800 }
801 
802 extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
803 
804 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
805 
806 #define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \
807  if (__kmp_user_lock_kind == lk_tas) { \
808  if ( __kmp_env_consistency_check ) { \
809  char const * const func = "omp_set_nest_lock"; \
810  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \
811  && lck->tas.lk.depth_locked == -1 ) { \
812  KMP_FATAL( LockSimpleUsedAsNestable, func ); \
813  } \
814  } \
815  if ( lck->tas.lk.poll - 1 == gtid ) { \
816  lck->tas.lk.depth_locked += 1; \
817  *depth = KMP_LOCK_ACQUIRED_NEXT; \
818  } else { \
819  if ( ( lck->tas.lk.poll != 0 ) || \
820  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
821  kmp_uint32 spins; \
822  KMP_FSYNC_PREPARE( lck ); \
823  KMP_INIT_YIELD( spins ); \
824  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
825  KMP_YIELD( TRUE ); \
826  } else { \
827  KMP_YIELD_SPIN( spins ); \
828  } \
829  while ( ( lck->tas.lk.poll != 0 ) || \
830  ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \
831  if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \
832  KMP_YIELD( TRUE ); \
833  } else { \
834  KMP_YIELD_SPIN( spins ); \
835  } \
836  } \
837  } \
838  lck->tas.lk.depth_locked = 1; \
839  *depth = KMP_LOCK_ACQUIRED_FIRST; \
840  } \
841  KMP_FSYNC_ACQUIRED( lck ); \
842  } else { \
843  KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \
844  *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \
845  }
846 
847 #else
848 static inline void
849 __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth )
850 {
851  KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL );
852  *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid );
853 }
854 #endif
855 
856 extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
857 
858 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
859 static inline int
860 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
861 {
862  if ( __kmp_user_lock_kind == lk_tas ) {
863  int retval;
864  if ( __kmp_env_consistency_check ) {
865  char const * const func = "omp_test_nest_lock";
866  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE )
867  && lck->tas.lk.depth_locked == -1 ) {
868  KMP_FATAL( LockSimpleUsedAsNestable, func );
869  }
870  }
871  KMP_DEBUG_ASSERT( gtid >= 0 );
872  if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */
873  return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
874  }
875  retval = ( ( lck->tas.lk.poll == 0 ) &&
876  KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) );
877  if ( retval ) {
878  KMP_MB();
879  lck->tas.lk.depth_locked = 1;
880  }
881  return retval;
882  } else {
883  KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
884  return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
885  }
886 }
887 #else
888 static inline int
889 __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
890 {
891  KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL );
892  return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid );
893 }
894 #endif
895 
896 extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid );
897 
898 static inline int
899 __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid )
900 {
901  KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL );
902  return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid );
903 }
904 
905 extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
906 
907 static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck )
908 {
909  KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL );
910  ( *__kmp_init_nested_user_lock_with_checks_ )( lck );
911 }
912 
913 extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck );
914 
915 static inline void
916 __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck )
917 {
918  KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL );
919  ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck );
920 }
921 
922 //
923 // user lock functions which do not necessarily exist for all lock kinds.
924 //
925 // The "set" functions usually have wrapper routines that check for a NULL set
926 // function pointer and call it if non-NULL.
927 //
928 // In some cases, it makes sense to have a "get" wrapper function check for a
929 // NULL get function pointer and return NULL / invalid value / error code if
930 // the function pointer is NULL.
931 //
932 // In other cases, the calling code really should differentiate between an
933 // unimplemented function and one that is implemented but returning NULL /
934 // invalied value. If this is the case, no get function wrapper exists.
935 //
936 
937 extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck );
938 
939 // no set function; fields set durining local allocation
940 
941 extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck );
942 
943 static inline const ident_t *
944 __kmp_get_user_lock_location( kmp_user_lock_p lck )
945 {
946  if ( __kmp_get_user_lock_location_ != NULL ) {
947  return ( *__kmp_get_user_lock_location_ )( lck );
948  }
949  else {
950  return NULL;
951  }
952 }
953 
954 extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc );
955 
956 static inline void
957 __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc )
958 {
959  if ( __kmp_set_user_lock_location_ != NULL ) {
960  ( *__kmp_set_user_lock_location_ )( lck, loc );
961  }
962 }
963 
964 extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck );
965 
966 extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags );
967 
968 static inline void
969 __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags )
970 {
971  if ( __kmp_set_user_lock_flags_ != NULL ) {
972  ( *__kmp_set_user_lock_flags_ )( lck, flags );
973  }
974 }
975 
976 //
977 // The fuction which sets up all of the vtbl pointers for kmp_user_lock_t.
978 //
979 extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind );
980 
981 //
982 // Macros for binding user lock functions.
983 //
984 #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \
985  __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
986  __kmp_acquire##nest##kind##_##suffix; \
987  __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
988  __kmp_release##nest##kind##_##suffix; \
989  __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \
990  __kmp_test##nest##kind##_##suffix; \
991  __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \
992  __kmp_init##nest##kind##_##suffix; \
993  __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \
994  __kmp_destroy##nest##kind##_##suffix; \
995 }
996 
997 #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock)
998 #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks)
999 #define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock)
1000 #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks)
1001 
1002 // ----------------------------------------------------------------------------
1003 // User lock table & lock allocation
1004 // ----------------------------------------------------------------------------
1005 
1006 /*
1007  On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which
1008  is not enough to store a pointer, so we have to use lock indexes instead of pointers and
1009  maintain lock table to map indexes to pointers.
1010 
1011 
1012  Note: The first element of the table is not a pointer to lock! It is a pointer to previously
1013  allocated table (or NULL if it is the first table).
1014 
1015  Usage:
1016 
1017  if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE
1018  Lock table is fully utilized. User locks are indexes, so table is
1019  used on user lock operation.
1020  Note: it may be the case (lin_32) that we don't need to use a lock
1021  table for regular locks, but do need the table for nested locks.
1022  }
1023  else {
1024  Lock table initialized but not actually used.
1025  }
1026 */
1027 
1028 struct kmp_lock_table {
1029  kmp_lock_index_t used; // Number of used elements
1030  kmp_lock_index_t allocated; // Number of allocated elements
1031  kmp_user_lock_p * table; // Lock table.
1032 };
1033 
1034 typedef struct kmp_lock_table kmp_lock_table_t;
1035 
1036 extern kmp_lock_table_t __kmp_user_lock_table;
1037 extern kmp_user_lock_p __kmp_lock_pool;
1038 
1039 struct kmp_block_of_locks {
1040  struct kmp_block_of_locks * next_block;
1041  void * locks;
1042 };
1043 
1044 typedef struct kmp_block_of_locks kmp_block_of_locks_t;
1045 
1046 extern kmp_block_of_locks_t *__kmp_lock_blocks;
1047 extern int __kmp_num_locks_in_block;
1048 
1049 extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags );
1050 extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck );
1051 extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func );
1052 extern void __kmp_cleanup_user_locks();
1053 
1054 #define KMP_CHECK_USER_LOCK_INIT() \
1055  { \
1056  if ( ! TCR_4( __kmp_init_user_locks ) ) { \
1057  __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \
1058  if ( ! TCR_4( __kmp_init_user_locks ) ) { \
1059  TCW_4( __kmp_init_user_locks, TRUE ); \
1060  } \
1061  __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \
1062  } \
1063  }
1064 
1065 #endif // KMP_USE_DYNAMIC_LOCK
1066 
1067 #undef KMP_PAD
1068 #undef KMP_GTID_DNE
1069 
1070 #if KMP_USE_DYNAMIC_LOCK
1071 
1072 //
1073 // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current
1074 // compatibility. Essential functionality of this new code is dynamic dispatch, but it also
1075 // implements (or enables implementation of) hinted user lock and critical section which will be
1076 // part of OMP 4.5 soon.
1077 //
1078 // Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock
1079 // function call on the created lock object requires type extraction and call through jump table
1080 // using the extracted type. This type information is stored in two different ways depending on
1081 // the size of the lock object, and we differentiate lock types by this size requirement - direct
1082 // and indirect locks.
1083 //
1084 // Direct locks:
1085 // A direct lock object fits into the space created by the compiler for an omp_lock_t object, and
1086 // TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage
1087 // for the lock type, and appropriate bit operation is required to access the data meaningful to
1088 // the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB
1089 // of the lock object. The newly introduced "hle" lock is also a direct lock.
1090 //
1091 // Indirect locks:
1092 // An indirect lock object requires more space than the compiler-generated space, and it should be
1093 // allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e.,
1094 // size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated
1095 // indirect lock (void * fits in the object) or an index to the indirect lock table entry that
1096 // holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly
1097 // introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock.
1098 // When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to
1099 // differentiate the lock from a direct lock, and the remaining part is the actual index to the
1100 // indirect lock table.
1101 //
1102 
1103 #include <stdint.h> // for uintptr_t
1104 
1105 // Shortcuts
1106 #define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1
1107 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0
1108 
1109 // List of lock definitions; all nested locks are indirect locks.
1110 // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE.
1111 // All nested locks are indirect lock types.
1112 #if KMP_USE_TSX
1113 # if KMP_USE_FUTEX
1114 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a)
1115 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
1116  m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1117  m(nested_queuing, a) m(nested_drdpa, a)
1118 # else
1119 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a)
1120 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \
1121  m(nested_tas, a) m(nested_ticket, a) \
1122  m(nested_queuing, a) m(nested_drdpa, a)
1123 # endif // KMP_USE_FUTEX
1124 # define KMP_LAST_D_LOCK lockseq_hle
1125 #else
1126 # if KMP_USE_FUTEX
1127 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a)
1128 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
1129  m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \
1130  m(nested_queuing, a) m(nested_drdpa, a)
1131 # define KMP_LAST_D_LOCK lockseq_futex
1132 # else
1133 # define KMP_FOREACH_D_LOCK(m, a) m(tas, a)
1134 # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \
1135  m(nested_tas, a) m(nested_ticket, a) \
1136  m(nested_queuing, a) m(nested_drdpa, a)
1137 # define KMP_LAST_D_LOCK lockseq_tas
1138 # endif // KMP_USE_FUTEX
1139 #endif // KMP_USE_TSX
1140 
1141 // Information used in dynamic dispatch
1142 #define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks
1143 #define KMP_FIRST_D_LOCK lockseq_tas
1144 #define KMP_FIRST_I_LOCK lockseq_ticket
1145 #define KMP_LAST_I_LOCK lockseq_nested_drdpa
1146 #define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types
1147 
1148 // Base type for dynamic locks.
1149 typedef kmp_uint32 kmp_dyna_lock_t;
1150 
1151 // Lock sequence that enumerates all lock kinds.
1152 // Always make this enumeration consistent with kmp_lockseq_t in the include directory.
1153 typedef enum {
1154  lockseq_indirect = 0,
1155 #define expand_seq(l,a) lockseq_##l,
1156  KMP_FOREACH_D_LOCK(expand_seq, 0)
1157  KMP_FOREACH_I_LOCK(expand_seq, 0)
1158 #undef expand_seq
1159 } kmp_dyna_lockseq_t;
1160 
1161 // Enumerates indirect lock tags.
1162 typedef enum {
1163 #define expand_tag(l,a) locktag_##l,
1164  KMP_FOREACH_I_LOCK(expand_tag, 0)
1165 #undef expand_tag
1166 } kmp_indirect_locktag_t;
1167 
1168 // Utility macros that extract information from lock sequences.
1169 #define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK)
1170 #define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK)
1171 #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK)
1172 #define KMP_GET_D_TAG(seq) ((seq)<<1 | 1)
1173 
1174 // Enumerates direct lock tags starting from indirect tag.
1175 typedef enum {
1176 #define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l),
1177  KMP_FOREACH_D_LOCK(expand_tag, 0)
1178 #undef expand_tag
1179 } kmp_direct_locktag_t;
1180 
1181 // Indirect lock type
1182 typedef struct {
1183  kmp_user_lock_p lock;
1184  kmp_indirect_locktag_t type;
1185 } kmp_indirect_lock_t;
1186 
1187 // Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking.
1188 extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t);
1189 extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *);
1190 extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32);
1191 extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32);
1192 extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32);
1193 
1194 // Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking.
1195 extern void (*__kmp_indirect_init[])(kmp_user_lock_p);
1196 extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p);
1197 extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32);
1198 extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32);
1199 extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32);
1200 
1201 // Extracts direct lock tag from a user lock pointer
1202 #define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<<KMP_LOCK_SHIFT)-1) & -(*((kmp_dyna_lock_t *)(l)) & 1))
1203 
1204 // Extracts indirect lock index from a user lock pointer
1205 #define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1)
1206 
1207 // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type).
1208 #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)]
1209 
1210 // Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type).
1211 #define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type]
1212 
1213 // Initializes a direct lock with the given lock pointer and lock sequence.
1214 #define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq)
1215 
1216 // Initializes an indirect lock with the given lock pointer and lock sequence.
1217 #define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq)
1218 
1219 // Returns "free" lock value for the given lock type.
1220 #define KMP_LOCK_FREE(type) (locktag_##type)
1221 
1222 // Returns "busy" lock value for the given lock teyp.
1223 #define KMP_LOCK_BUSY(v, type) ((v)<<KMP_LOCK_SHIFT | locktag_##type)
1224 
1225 // Returns lock value after removing (shifting) lock tag.
1226 #define KMP_LOCK_STRIP(v) ((v)>>KMP_LOCK_SHIFT)
1227 
1228 // Initializes global states and data structures for managing dynamic user locks.
1229 extern void __kmp_init_dynamic_user_locks();
1230 
1231 // Allocates and returns an indirect lock with the given indirect lock tag.
1232 extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t);
1233 
1234 // Cleans up global states and data structures for managing dynamic user locks.
1235 extern void __kmp_cleanup_indirect_user_locks();
1236 
1237 // Default user lock sequence when not using hinted locks.
1238 extern kmp_dyna_lockseq_t __kmp_user_lock_seq;
1239 
1240 // Jump table for "set lock location", available only for indirect locks.
1241 extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *);
1242 #define KMP_SET_I_LOCK_LOCATION(lck, loc) { \
1243  if (__kmp_indirect_set_location[(lck)->type] != NULL) \
1244  __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \
1245 }
1246 
1247 // Jump table for "set lock flags", available only for indirect locks.
1248 extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t);
1249 #define KMP_SET_I_LOCK_FLAGS(lck, flag) { \
1250  if (__kmp_indirect_set_flags[(lck)->type] != NULL) \
1251  __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \
1252 }
1253 
1254 // Jump table for "get lock location", available only for indirect locks.
1255 extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
1256 #define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \
1257  ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \
1258  : NULL )
1259 
1260 // Jump table for "get lock flags", available only for indirect locks.
1261 extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p);
1262 #define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \
1263  ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \
1264  : NULL )
1265 
1266 #define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together
1267 
1268 // Lock table for indirect locks.
1269 typedef struct kmp_indirect_lock_table {
1270  kmp_indirect_lock_t **table; // blocks of indirect locks allocated
1271  kmp_lock_index_t size; // size of the indirect lock table
1272  kmp_lock_index_t next; // index to the next lock to be allocated
1273 } kmp_indirect_lock_table_t;
1274 
1275 extern kmp_indirect_lock_table_t __kmp_i_lock_table;
1276 
1277 // Returns the indirect lock associated with the given index.
1278 #define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK)
1279 
1280 // Number of locks in a lock block, which is fixed to "1" now.
1281 // TODO: No lock block implementation now. If we do support, we need to manage lock block data
1282 // structure for each indirect lock type.
1283 extern int __kmp_num_locks_in_block;
1284 
1285 // Fast lock table lookup without consistency checking
1286 #define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \
1287  ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \
1288  : *((kmp_indirect_lock_t **)(l)) )
1289 
1290 // Used once in kmp_error.cpp
1291 extern kmp_int32
1292 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32);
1293 
1294 #else // KMP_USE_DYNAMIC_LOCK
1295 
1296 # define KMP_LOCK_BUSY(v, type) (v)
1297 # define KMP_LOCK_FREE(type) 0
1298 # define KMP_LOCK_STRIP(v) (v)
1299 
1300 #endif // KMP_USE_DYNAMIC_LOCK
1301 
1302 // data structure for using backoff within spin locks.
1303 typedef struct {
1304  kmp_uint32 step; // current step
1305  kmp_uint32 max_backoff; // upper bound of outer delay loop
1306  kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent)
1307 } kmp_backoff_t;
1308 
1309 // Runtime's default backoff parameters
1310 extern kmp_backoff_t __kmp_spin_backoff_params;
1311 
1312 // Backoff function
1313 extern void __kmp_spin_backoff(kmp_backoff_t *);
1314 
1315 #ifdef __cplusplus
1316 } // extern "C"
1317 #endif // __cplusplus
1318 
1319 #endif /* KMP_LOCK_H */
1320 
Definition: kmp.h:200