mdds
aos/block_util.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
3  *
4  * Copyright (c) 2021 Kohei Yoshida
5  *
6  * Permission is hereby granted, free of charge, to any person
7  * obtaining a copy of this software and associated documentation
8  * files (the "Software"), to deal in the Software without
9  * restriction, including without limitation the rights to use,
10  * copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following
13  * conditions:
14  *
15  * The above copyright notice and this permission notice shall be
16  * included in all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25  * OTHER DEALINGS IN THE SOFTWARE.
26  *
27  ************************************************************************/
28 
29 #ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30 #define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
31 
32 #include "mdds/global.hpp"
33 #include "../types.hpp"
34 
35 namespace mdds { namespace mtv { namespace aos {
36 
37 namespace detail {
38 
39 template<typename Blks, lu_factor_t F>
41 {
42  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
43  {
44  static_assert(invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
45  }
46 };
47 
48 template<typename Blks>
49 struct adjust_block_positions<Blks, lu_factor_t::none>
50 {
51  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
52  {
53  int64_t n = blocks.size();
54 
55  if (start_block_index >= n)
56  return;
57 
58 #if MDDS_USE_OPENMP
59  #pragma omp parallel for
60 #endif
61  for (int64_t i = start_block_index; i < n; ++i)
62  blocks[i].position += delta;
63  }
64 };
65 
66 template<typename Blks>
67 struct adjust_block_positions<Blks, lu_factor_t::lu4>
68 {
69  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
70  {
71  int64_t n = blocks.size();
72 
73  if (start_block_index >= n)
74  return;
75 
76  // Ensure that the section length is divisible by 4.
77  int64_t len = n - start_block_index;
78  int64_t rem = len & 3; // % 4
79  len -= rem;
80  len += start_block_index;
81 #if MDDS_USE_OPENMP
82  #pragma omp parallel for
83 #endif
84  for (int64_t i = start_block_index; i < len; i += 4)
85  {
86  blocks[i].position += delta;
87  blocks[i+1].position += delta;
88  blocks[i+2].position += delta;
89  blocks[i+3].position += delta;
90  }
91 
92  rem += len;
93  for (int64_t i = len; i < rem; ++i)
94  blocks[i].position += delta;
95  }
96 };
97 
98 template<typename Blks>
99 struct adjust_block_positions<Blks, lu_factor_t::lu8>
100 {
101  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
102  {
103  int64_t n = blocks.size();
104 
105  if (start_block_index >= n)
106  return;
107 
108  // Ensure that the section length is divisible by 8.
109  int64_t len = n - start_block_index;
110  int64_t rem = len & 7; // % 8
111  len -= rem;
112  len += start_block_index;
113 #if MDDS_USE_OPENMP
114  #pragma omp parallel for
115 #endif
116  for (int64_t i = start_block_index; i < len; i += 8)
117  {
118  blocks[i].position += delta;
119  blocks[i+1].position += delta;
120  blocks[i+2].position += delta;
121  blocks[i+3].position += delta;
122  blocks[i+4].position += delta;
123  blocks[i+5].position += delta;
124  blocks[i+6].position += delta;
125  blocks[i+7].position += delta;
126  }
127 
128  rem += len;
129  for (int64_t i = len; i < rem; ++i)
130  blocks[i].position += delta;
131  }
132 };
133 
134 template<typename Blks>
135 struct adjust_block_positions<Blks, lu_factor_t::lu16>
136 {
137  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
138  {
139  int64_t n = blocks.size();
140 
141  if (start_block_index >= n)
142  return;
143 
144  // Ensure that the section length is divisible by 16.
145  int64_t len = n - start_block_index;
146  int64_t rem = len & 15; // % 16
147  len -= rem;
148  len += start_block_index;
149 #if MDDS_USE_OPENMP
150  #pragma omp parallel for
151 #endif
152  for (int64_t i = start_block_index; i < len; i += 16)
153  {
154  blocks[i].position += delta;
155  blocks[i+1].position += delta;
156  blocks[i+2].position += delta;
157  blocks[i+3].position += delta;
158  blocks[i+4].position += delta;
159  blocks[i+5].position += delta;
160  blocks[i+6].position += delta;
161  blocks[i+7].position += delta;
162  blocks[i+8].position += delta;
163  blocks[i+9].position += delta;
164  blocks[i+10].position += delta;
165  blocks[i+11].position += delta;
166  blocks[i+12].position += delta;
167  blocks[i+13].position += delta;
168  blocks[i+14].position += delta;
169  blocks[i+15].position += delta;
170  }
171 
172  rem += len;
173  for (int64_t i = len; i < rem; ++i)
174  blocks[i].position += delta;
175  }
176 };
177 
178 template<typename Blks>
179 struct adjust_block_positions<Blks, lu_factor_t::lu32>
180 {
181  void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
182  {
183  int64_t n = blocks.size();
184 
185  if (start_block_index >= n)
186  return;
187 
188  // Ensure that the section length is divisible by 32.
189  int64_t len = n - start_block_index;
190  int64_t rem = len & 31; // % 32
191  len -= rem;
192  len += start_block_index;
193 #if MDDS_USE_OPENMP
194  #pragma omp parallel for
195 #endif
196  for (int64_t i = start_block_index; i < len; i += 32)
197  {
198  blocks[i].position += delta;
199  blocks[i+1].position += delta;
200  blocks[i+2].position += delta;
201  blocks[i+3].position += delta;
202  blocks[i+4].position += delta;
203  blocks[i+5].position += delta;
204  blocks[i+6].position += delta;
205  blocks[i+7].position += delta;
206  blocks[i+8].position += delta;
207  blocks[i+9].position += delta;
208  blocks[i+10].position += delta;
209  blocks[i+11].position += delta;
210  blocks[i+12].position += delta;
211  blocks[i+13].position += delta;
212  blocks[i+14].position += delta;
213  blocks[i+15].position += delta;
214  blocks[i+16].position += delta;
215  blocks[i+17].position += delta;
216  blocks[i+18].position += delta;
217  blocks[i+19].position += delta;
218  blocks[i+20].position += delta;
219  blocks[i+21].position += delta;
220  blocks[i+22].position += delta;
221  blocks[i+23].position += delta;
222  blocks[i+24].position += delta;
223  blocks[i+25].position += delta;
224  blocks[i+26].position += delta;
225  blocks[i+27].position += delta;
226  blocks[i+28].position += delta;
227  blocks[i+29].position += delta;
228  blocks[i+30].position += delta;
229  blocks[i+31].position += delta;
230  }
231 
232  rem += len;
233  for (int64_t i = len; i < rem; ++i)
234  blocks[i].position += delta;
235  }
236 };
237 
238 } // namespace detail
239 
240 }}}
241 
242 #endif
243 
244 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
245 
Definition: aos/block_util.hpp:41