LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef KMP_AFFINITY_H
16 #define KMP_AFFINITY_H
17 
18 #include "kmp_os.h"
19 #include "kmp.h"
20 
21 #if KMP_AFFINITY_SUPPORTED
22 #if KMP_USE_HWLOC
23 class KMPHwlocAffinity: public KMPAffinity {
24 public:
25  class Mask : public KMPAffinity::Mask {
26  hwloc_cpuset_t mask;
27  public:
28  Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
29  ~Mask() { hwloc_bitmap_free(mask); }
30  void set(int i) override { hwloc_bitmap_set(mask, i); }
31  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
32  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
33  void zero() override { hwloc_bitmap_zero(mask); }
34  void copy(const KMPAffinity::Mask* src) override {
35  const Mask* convert = static_cast<const Mask*>(src);
36  hwloc_bitmap_copy(mask, convert->mask);
37  }
38  void bitwise_and(const KMPAffinity::Mask* rhs) override {
39  const Mask* convert = static_cast<const Mask*>(rhs);
40  hwloc_bitmap_and(mask, mask, convert->mask);
41  }
42  void bitwise_or(const KMPAffinity::Mask * rhs) override {
43  const Mask* convert = static_cast<const Mask*>(rhs);
44  hwloc_bitmap_or(mask, mask, convert->mask);
45  }
46  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
47  int begin() const override { return hwloc_bitmap_first(mask); }
48  int end() const override { return -1; }
49  int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
50  int get_system_affinity(bool abort_on_error) override {
51  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
52  "Illegal get affinity operation when not capable");
53  int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
54  if (retval >= 0) {
55  return 0;
56  }
57  int error = errno;
58  if (abort_on_error) {
59  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
60  }
61  return error;
62  }
63  int set_system_affinity(bool abort_on_error) const override {
64  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
65  "Illegal get affinity operation when not capable");
66  int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
67  if (retval >= 0) {
68  return 0;
69  }
70  int error = errno;
71  if (abort_on_error) {
72  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
73  }
74  return error;
75  }
76  int get_proc_group() const override {
77  int i;
78  int group = -1;
79 # if KMP_OS_WINDOWS
80  if (__kmp_num_proc_groups == 1) {
81  return 1;
82  }
83  for (i = 0; i < __kmp_num_proc_groups; i++) {
84  // On windows, the long type is always 32 bits
85  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
86  unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
87  if (first_32_bits == 0 && second_32_bits == 0) {
88  continue;
89  }
90  if (group >= 0) {
91  return -1;
92  }
93  group = i;
94  }
95 # endif /* KMP_OS_WINDOWS */
96  return group;
97  }
98  };
99  void determine_capable(const char* var) override {
100  const hwloc_topology_support* topology_support;
101  if(__kmp_hwloc_topology == NULL) {
102  if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
103  __kmp_hwloc_error = TRUE;
104  if(__kmp_affinity_verbose)
105  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
106  }
107  if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
108  __kmp_hwloc_error = TRUE;
109  if(__kmp_affinity_verbose)
110  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
111  }
112  }
113  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
114  // Is the system capable of setting/getting this thread's affinity?
115  // also, is topology discovery possible? (pu indicates ability to discover processing units)
116  // and finally, were there no errors when calling any hwloc_* API functions?
117  if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
118  topology_support->cpubind->get_thisthread_cpubind &&
119  topology_support->discovery->pu &&
120  !__kmp_hwloc_error)
121  {
122  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
123  KMP_AFFINITY_ENABLE(TRUE);
124  } else {
125  // indicate that hwloc didn't work and disable affinity
126  __kmp_hwloc_error = TRUE;
127  KMP_AFFINITY_DISABLE();
128  }
129  }
130  void bind_thread(int which) override {
131  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
132  "Illegal set affinity operation when not capable");
133  KMPAffinity::Mask *mask;
134  KMP_CPU_ALLOC_ON_STACK(mask);
135  KMP_CPU_ZERO(mask);
136  KMP_CPU_SET(which, mask);
137  __kmp_set_system_affinity(mask, TRUE);
138  KMP_CPU_FREE_FROM_STACK(mask);
139  }
140  KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
141  void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
142  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
143  void deallocate_mask_array(KMPAffinity::Mask* array) override {
144  Mask* hwloc_array = static_cast<Mask*>(array);
145  delete[] hwloc_array;
146  }
147  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
148  Mask* hwloc_array = static_cast<Mask*>(array);
149  return &(hwloc_array[index]);
150  }
151  api_type get_api_type() const override { return HWLOC; }
152 };
153 #endif /* KMP_USE_HWLOC */
154 
155 #if KMP_OS_LINUX
156 /*
157  * On some of the older OS's that we build on, these constants aren't present
158  * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
159  * all systems of the same arch where they are defined, and they cannot change.
160  * stone forever.
161  */
162 #include <sys/syscall.h>
163 # if KMP_ARCH_X86 || KMP_ARCH_ARM
164 # ifndef __NR_sched_setaffinity
165 # define __NR_sched_setaffinity 241
166 # elif __NR_sched_setaffinity != 241
167 # error Wrong code for setaffinity system call.
168 # endif /* __NR_sched_setaffinity */
169 # ifndef __NR_sched_getaffinity
170 # define __NR_sched_getaffinity 242
171 # elif __NR_sched_getaffinity != 242
172 # error Wrong code for getaffinity system call.
173 # endif /* __NR_sched_getaffinity */
174 # elif KMP_ARCH_AARCH64
175 # ifndef __NR_sched_setaffinity
176 # define __NR_sched_setaffinity 122
177 # elif __NR_sched_setaffinity != 122
178 # error Wrong code for setaffinity system call.
179 # endif /* __NR_sched_setaffinity */
180 # ifndef __NR_sched_getaffinity
181 # define __NR_sched_getaffinity 123
182 # elif __NR_sched_getaffinity != 123
183 # error Wrong code for getaffinity system call.
184 # endif /* __NR_sched_getaffinity */
185 # elif KMP_ARCH_X86_64
186 # ifndef __NR_sched_setaffinity
187 # define __NR_sched_setaffinity 203
188 # elif __NR_sched_setaffinity != 203
189 # error Wrong code for setaffinity system call.
190 # endif /* __NR_sched_setaffinity */
191 # ifndef __NR_sched_getaffinity
192 # define __NR_sched_getaffinity 204
193 # elif __NR_sched_getaffinity != 204
194 # error Wrong code for getaffinity system call.
195 # endif /* __NR_sched_getaffinity */
196 # elif KMP_ARCH_PPC64
197 # ifndef __NR_sched_setaffinity
198 # define __NR_sched_setaffinity 222
199 # elif __NR_sched_setaffinity != 222
200 # error Wrong code for setaffinity system call.
201 # endif /* __NR_sched_setaffinity */
202 # ifndef __NR_sched_getaffinity
203 # define __NR_sched_getaffinity 223
204 # elif __NR_sched_getaffinity != 223
205 # error Wrong code for getaffinity system call.
206 # endif /* __NR_sched_getaffinity */
207 # elif KMP_ARCH_MIPS
208 # ifndef __NR_sched_setaffinity
209 # define __NR_sched_setaffinity 4239
210 # elif __NR_sched_setaffinity != 4239
211 # error Wrong code for setaffinity system call.
212 # endif /* __NR_sched_setaffinity */
213 # ifndef __NR_sched_getaffinity
214 # define __NR_sched_getaffinity 4240
215 # elif __NR_sched_getaffinity != 4240
216 # error Wrong code for getaffinity system call.
217 # endif /* __NR_sched_getaffinity */
218 # elif KMP_ARCH_MIPS64
219 # ifndef __NR_sched_setaffinity
220 # define __NR_sched_setaffinity 5195
221 # elif __NR_sched_setaffinity != 5195
222 # error Wrong code for setaffinity system call.
223 # endif /* __NR_sched_setaffinity */
224 # ifndef __NR_sched_getaffinity
225 # define __NR_sched_getaffinity 5196
226 # elif __NR_sched_getaffinity != 5196
227 # error Wrong code for getaffinity system call.
228 # endif /* __NR_sched_getaffinity */
229 # else
230 # error Unknown or unsupported architecture
231 # endif /* KMP_ARCH_* */
232 class KMPNativeAffinity : public KMPAffinity {
233  class Mask : public KMPAffinity::Mask {
234  typedef unsigned char mask_t;
235  static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
236  public:
237  mask_t* mask;
238  Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
239  ~Mask() { if (mask) __kmp_free(mask); }
240  void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
241  bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
242  void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
243  void zero() override {
244  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
245  mask[i] = 0;
246  }
247  void copy(const KMPAffinity::Mask* src) override {
248  const Mask * convert = static_cast<const Mask*>(src);
249  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
250  mask[i] = convert->mask[i];
251  }
252  void bitwise_and(const KMPAffinity::Mask* rhs) override {
253  const Mask * convert = static_cast<const Mask*>(rhs);
254  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
255  mask[i] &= convert->mask[i];
256  }
257  void bitwise_or(const KMPAffinity::Mask* rhs) override {
258  const Mask * convert = static_cast<const Mask*>(rhs);
259  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
260  mask[i] |= convert->mask[i];
261  }
262  void bitwise_not() override {
263  for (size_t i=0; i<__kmp_affin_mask_size; ++i)
264  mask[i] = ~(mask[i]);
265  }
266  int begin() const override {
267  int retval = 0;
268  while (retval < end() && !is_set(retval))
269  ++retval;
270  return retval;
271  }
272  int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
273  int next(int previous) const override {
274  int retval = previous+1;
275  while (retval < end() && !is_set(retval))
276  ++retval;
277  return retval;
278  }
279  int get_system_affinity(bool abort_on_error) override {
280  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
281  "Illegal get affinity operation when not capable");
282  int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
283  if (retval >= 0) {
284  return 0;
285  }
286  int error = errno;
287  if (abort_on_error) {
288  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
289  }
290  return error;
291  }
292  int set_system_affinity(bool abort_on_error) const override {
293  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
294  "Illegal get affinity operation when not capable");
295  int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
296  if (retval >= 0) {
297  return 0;
298  }
299  int error = errno;
300  if (abort_on_error) {
301  __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
302  }
303  return error;
304  }
305  };
306  void determine_capable(const char* env_var) override {
307  __kmp_affinity_determine_capable(env_var);
308  }
309  void bind_thread(int which) override {
310  __kmp_affinity_bind_thread(which);
311  }
312  KMPAffinity::Mask* allocate_mask() override {
313  KMPNativeAffinity::Mask* retval = new Mask();
314  return retval;
315  }
316  void deallocate_mask(KMPAffinity::Mask* m) override {
317  KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
318  delete m;
319  }
320  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
321  void deallocate_mask_array(KMPAffinity::Mask* array) override {
322  Mask* linux_array = static_cast<Mask*>(array);
323  delete[] linux_array;
324  }
325  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
326  Mask* linux_array = static_cast<Mask*>(array);
327  return &(linux_array[index]);
328  }
329  api_type get_api_type() const override { return NATIVE_OS; }
330 };
331 #endif /* KMP_OS_LINUX */
332 
333 #if KMP_OS_WINDOWS
334 class KMPNativeAffinity : public KMPAffinity {
335  class Mask : public KMPAffinity::Mask {
336  typedef ULONG_PTR mask_t;
337  static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
338  mask_t* mask;
339  public:
340  Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
341  ~Mask() { if (mask) __kmp_free(mask); }
342  void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
343  bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
344  void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
345  void zero() override {
346  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
347  mask[i] = 0;
348  }
349  void copy(const KMPAffinity::Mask* src) override {
350  const Mask * convert = static_cast<const Mask*>(src);
351  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
352  mask[i] = convert->mask[i];
353  }
354  void bitwise_and(const KMPAffinity::Mask* rhs) override {
355  const Mask * convert = static_cast<const Mask*>(rhs);
356  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
357  mask[i] &= convert->mask[i];
358  }
359  void bitwise_or(const KMPAffinity::Mask* rhs) override {
360  const Mask * convert = static_cast<const Mask*>(rhs);
361  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
362  mask[i] |= convert->mask[i];
363  }
364  void bitwise_not() override {
365  for (size_t i=0; i<__kmp_num_proc_groups; ++i)
366  mask[i] = ~(mask[i]);
367  }
368  int begin() const override {
369  int retval = 0;
370  while (retval < end() && !is_set(retval))
371  ++retval;
372  return retval;
373  }
374  int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
375  int next(int previous) const override {
376  int retval = previous+1;
377  while (retval < end() && !is_set(retval))
378  ++retval;
379  return retval;
380  }
381  int set_system_affinity(bool abort_on_error) const override {
382  if (__kmp_num_proc_groups > 1) {
383  // Check for a valid mask.
384  GROUP_AFFINITY ga;
385  int group = get_proc_group();
386  if (group < 0) {
387  if (abort_on_error) {
388  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
389  }
390  return -1;
391  }
392  // Transform the bit vector into a GROUP_AFFINITY struct
393  // and make the system call to set affinity.
394  ga.Group = group;
395  ga.Mask = mask[group];
396  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
397 
398  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
399  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
400  DWORD error = GetLastError();
401  if (abort_on_error) {
402  __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
403  KMP_ERR( error ), __kmp_msg_null);
404  }
405  return error;
406  }
407  } else {
408  if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
409  DWORD error = GetLastError();
410  if (abort_on_error) {
411  __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
412  KMP_ERR( error ), __kmp_msg_null);
413  }
414  return error;
415  }
416  }
417  return 0;
418  }
419  int get_system_affinity(bool abort_on_error) override {
420  if (__kmp_num_proc_groups > 1) {
421  this->zero();
422  GROUP_AFFINITY ga;
423  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
424  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
425  DWORD error = GetLastError();
426  if (abort_on_error) {
427  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
428  KMP_ERR(error), __kmp_msg_null);
429  }
430  return error;
431  }
432  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
433  return -1;
434  }
435  mask[ga.Group] = ga.Mask;
436  } else {
437  mask_t newMask, sysMask, retval;
438  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
439  DWORD error = GetLastError();
440  if (abort_on_error) {
441  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
442  KMP_ERR(error), __kmp_msg_null);
443  }
444  return error;
445  }
446  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
447  if (! retval) {
448  DWORD error = GetLastError();
449  if (abort_on_error) {
450  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
451  KMP_ERR(error), __kmp_msg_null);
452  }
453  return error;
454  }
455  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
456  if (! newMask) {
457  DWORD error = GetLastError();
458  if (abort_on_error) {
459  __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
460  KMP_ERR(error), __kmp_msg_null);
461  }
462  }
463  *mask = retval;
464  }
465  return 0;
466  }
467  int get_proc_group() const override {
468  int group = -1;
469  if (__kmp_num_proc_groups == 1) {
470  return 1;
471  }
472  for (int i = 0; i < __kmp_num_proc_groups; i++) {
473  if (mask[i] == 0)
474  continue;
475  if (group >= 0)
476  return -1;
477  group = i;
478  }
479  return group;
480  }
481  };
482  void determine_capable(const char* env_var) override {
483  __kmp_affinity_determine_capable(env_var);
484  }
485  void bind_thread(int which) override {
486  __kmp_affinity_bind_thread(which);
487  }
488  KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
489  void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
490  KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
491  void deallocate_mask_array(KMPAffinity::Mask* array) override {
492  Mask* windows_array = static_cast<Mask*>(array);
493  delete[] windows_array;
494  }
495  KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
496  Mask* windows_array = static_cast<Mask*>(array);
497  return &(windows_array[index]);
498  }
499  api_type get_api_type() const override { return NATIVE_OS; }
500 };
501 #endif /* KMP_OS_WINDOWS */
502 #endif /* KMP_AFFINITY_SUPPORTED */
503 
504 class Address {
505 public:
506  static const unsigned maxDepth = 32;
507  unsigned labels[maxDepth];
508  unsigned childNums[maxDepth];
509  unsigned depth;
510  unsigned leader;
511  Address(unsigned _depth)
512  : depth(_depth), leader(FALSE) {
513  }
514  Address &operator=(const Address &b) {
515  depth = b.depth;
516  for (unsigned i = 0; i < depth; i++) {
517  labels[i] = b.labels[i];
518  childNums[i] = b.childNums[i];
519  }
520  leader = FALSE;
521  return *this;
522  }
523  bool operator==(const Address &b) const {
524  if (depth != b.depth)
525  return false;
526  for (unsigned i = 0; i < depth; i++)
527  if(labels[i] != b.labels[i])
528  return false;
529  return true;
530  }
531  bool isClose(const Address &b, int level) const {
532  if (depth != b.depth)
533  return false;
534  if ((unsigned)level >= depth)
535  return true;
536  for (unsigned i = 0; i < (depth - level); i++)
537  if(labels[i] != b.labels[i])
538  return false;
539  return true;
540  }
541  bool operator!=(const Address &b) const {
542  return !operator==(b);
543  }
544  void print() const {
545  unsigned i;
546  printf("Depth: %u --- ", depth);
547  for(i=0;i<depth;i++) {
548  printf("%u ", labels[i]);
549  }
550  }
551 };
552 
553 class AddrUnsPair {
554 public:
555  Address first;
556  unsigned second;
557  AddrUnsPair(Address _first, unsigned _second)
558  : first(_first), second(_second) {
559  }
560  AddrUnsPair &operator=(const AddrUnsPair &b)
561  {
562  first = b.first;
563  second = b.second;
564  return *this;
565  }
566  void print() const {
567  printf("first = "); first.print();
568  printf(" --- second = %u", second);
569  }
570  bool operator==(const AddrUnsPair &b) const {
571  if(first != b.first) return false;
572  if(second != b.second) return false;
573  return true;
574  }
575  bool operator!=(const AddrUnsPair &b) const {
576  return !operator==(b);
577  }
578 };
579 
580 
581 static int
582 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
583 {
584  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
585  ->first);
586  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
587  ->first);
588  unsigned depth = aa->depth;
589  unsigned i;
590  KMP_DEBUG_ASSERT(depth == bb->depth);
591  for (i = 0; i < depth; i++) {
592  if (aa->labels[i] < bb->labels[i]) return -1;
593  if (aa->labels[i] > bb->labels[i]) return 1;
594  }
595  return 0;
596 }
597 
598 
605 public:
608  static const kmp_uint32 maxLeaves=4;
609  static const kmp_uint32 minBranch=4;
614  kmp_uint32 maxLevels;
615 
619  kmp_uint32 depth;
620  kmp_uint32 base_num_threads;
621  enum init_status { initialized=0, not_initialized=1, initializing=2 };
622  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
623  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
624 
628  kmp_uint32 *numPerLevel;
629  kmp_uint32 *skipPerLevel;
630 
631  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
632  int hier_depth = adr2os[0].first.depth;
633  int level = 0;
634  for (int i=hier_depth-1; i>=0; --i) {
635  int max = -1;
636  for (int j=0; j<num_addrs; ++j) {
637  int next = adr2os[j].first.childNums[i];
638  if (next > max) max = next;
639  }
640  numPerLevel[level] = max+1;
641  ++level;
642  }
643  }
644 
645  hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
646 
647  void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
648 
649  void init(AddrUnsPair *adr2os, int num_addrs)
650  {
651  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
652  if (bool_result == 0) { // Wait for initialization
653  while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
654  return;
655  }
656  KMP_DEBUG_ASSERT(bool_result==1);
657 
658  /* Added explicit initialization of the data fields here to prevent usage of dirty value
659  observed when static library is re-initialized multiple times (e.g. when
660  non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
661  depth = 1;
662  resizing = 0;
663  maxLevels = 7;
664  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
665  skipPerLevel = &(numPerLevel[maxLevels]);
666  for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
667  numPerLevel[i] = 1;
668  skipPerLevel[i] = 1;
669  }
670 
671  // Sort table by physical ID
672  if (adr2os) {
673  qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
674  deriveLevels(adr2os, num_addrs);
675  }
676  else {
677  numPerLevel[0] = maxLeaves;
678  numPerLevel[1] = num_addrs/maxLeaves;
679  if (num_addrs%maxLeaves) numPerLevel[1]++;
680  }
681 
682  base_num_threads = num_addrs;
683  for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
684  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
685  depth++;
686 
687  kmp_uint32 branch = minBranch;
688  if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
689  if (branch<minBranch) branch=minBranch;
690  for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
691  while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
692  if (numPerLevel[d] & 1) numPerLevel[d]++;
693  numPerLevel[d] = numPerLevel[d] >> 1;
694  if (numPerLevel[d+1] == 1) depth++;
695  numPerLevel[d+1] = numPerLevel[d+1] << 1;
696  }
697  if(numPerLevel[0] == 1) {
698  branch = branch >> 1;
699  if (branch<4) branch = minBranch;
700  }
701  }
702 
703  for (kmp_uint32 i=1; i<depth; ++i)
704  skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
705  // Fill in hierarchy in the case of oversubscription
706  for (kmp_uint32 i=depth; i<maxLevels; ++i)
707  skipPerLevel[i] = 2*skipPerLevel[i-1];
708 
709  uninitialized = initialized; // One writer
710 
711  }
712 
713  // Resize the hierarchy if nproc changes to something larger than before
714  void resize(kmp_uint32 nproc)
715  {
716  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
717  while (bool_result == 0) { // someone else is trying to resize
718  KMP_CPU_PAUSE();
719  if (nproc <= base_num_threads) // happy with other thread's resize
720  return;
721  else // try to resize
722  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
723  }
724  KMP_DEBUG_ASSERT(bool_result!=0);
725  if (nproc <= base_num_threads) return; // happy with other thread's resize
726 
727  // Calculate new maxLevels
728  kmp_uint32 old_sz = skipPerLevel[depth-1];
729  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
730  // First see if old maxLevels is enough to contain new size
731  for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
732  skipPerLevel[i] = 2*skipPerLevel[i-1];
733  numPerLevel[i-1] *= 2;
734  old_sz *= 2;
735  depth++;
736  }
737  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
738  while (nproc > old_sz) {
739  old_sz *=2;
740  incs++;
741  depth++;
742  }
743  maxLevels += incs;
744 
745  // Resize arrays
746  kmp_uint32 *old_numPerLevel = numPerLevel;
747  kmp_uint32 *old_skipPerLevel = skipPerLevel;
748  numPerLevel = skipPerLevel = NULL;
749  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
750  skipPerLevel = &(numPerLevel[maxLevels]);
751 
752  // Copy old elements from old arrays
753  for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
754  numPerLevel[i] = old_numPerLevel[i];
755  skipPerLevel[i] = old_skipPerLevel[i];
756  }
757 
758  // Init new elements in arrays to 1
759  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
760  numPerLevel[i] = 1;
761  skipPerLevel[i] = 1;
762  }
763 
764  // Free old arrays
765  __kmp_free(old_numPerLevel);
766  }
767 
768  // Fill in oversubscription levels of hierarchy
769  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
770  skipPerLevel[i] = 2*skipPerLevel[i-1];
771 
772  base_num_threads = nproc;
773  resizing = 0; // One writer
774 
775  }
776 };
777 #endif // KMP_AFFINITY_H
kmp_uint32 depth
Definition: kmp_affinity.h:619
kmp_uint32 * numPerLevel
Definition: kmp_affinity.h:628
kmp_uint32 maxLevels
Definition: kmp_affinity.h:614