29 #ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30 #define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
32 #include "mdds/global.hpp"
33 #include "../types.hpp"
35 namespace mdds {
namespace mtv {
namespace aos {
39 template<
typename Blks, lu_factor_t F>
42 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
44 static_assert(invalid_static_int<F>,
"The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
48 template<
typename Blks>
51 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
53 int64_t n = blocks.size();
55 if (start_block_index >= n)
59 #pragma omp parallel for
61 for (int64_t i = start_block_index; i < n; ++i)
62 blocks[i].position += delta;
66 template<
typename Blks>
69 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
71 int64_t n = blocks.size();
73 if (start_block_index >= n)
77 int64_t len = n - start_block_index;
78 int64_t rem = len & 3;
80 len += start_block_index;
82 #pragma omp parallel for
84 for (int64_t i = start_block_index; i < len; i += 4)
86 blocks[i].position += delta;
87 blocks[i+1].position += delta;
88 blocks[i+2].position += delta;
89 blocks[i+3].position += delta;
93 for (int64_t i = len; i < rem; ++i)
94 blocks[i].position += delta;
98 template<
typename Blks>
101 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
103 int64_t n = blocks.size();
105 if (start_block_index >= n)
109 int64_t len = n - start_block_index;
110 int64_t rem = len & 7;
112 len += start_block_index;
114 #pragma omp parallel for
116 for (int64_t i = start_block_index; i < len; i += 8)
118 blocks[i].position += delta;
119 blocks[i+1].position += delta;
120 blocks[i+2].position += delta;
121 blocks[i+3].position += delta;
122 blocks[i+4].position += delta;
123 blocks[i+5].position += delta;
124 blocks[i+6].position += delta;
125 blocks[i+7].position += delta;
129 for (int64_t i = len; i < rem; ++i)
130 blocks[i].position += delta;
134 template<
typename Blks>
137 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
139 int64_t n = blocks.size();
141 if (start_block_index >= n)
145 int64_t len = n - start_block_index;
146 int64_t rem = len & 15;
148 len += start_block_index;
150 #pragma omp parallel for
152 for (int64_t i = start_block_index; i < len; i += 16)
154 blocks[i].position += delta;
155 blocks[i+1].position += delta;
156 blocks[i+2].position += delta;
157 blocks[i+3].position += delta;
158 blocks[i+4].position += delta;
159 blocks[i+5].position += delta;
160 blocks[i+6].position += delta;
161 blocks[i+7].position += delta;
162 blocks[i+8].position += delta;
163 blocks[i+9].position += delta;
164 blocks[i+10].position += delta;
165 blocks[i+11].position += delta;
166 blocks[i+12].position += delta;
167 blocks[i+13].position += delta;
168 blocks[i+14].position += delta;
169 blocks[i+15].position += delta;
173 for (int64_t i = len; i < rem; ++i)
174 blocks[i].position += delta;
178 template<
typename Blks>
181 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
183 int64_t n = blocks.size();
185 if (start_block_index >= n)
189 int64_t len = n - start_block_index;
190 int64_t rem = len & 31;
192 len += start_block_index;
194 #pragma omp parallel for
196 for (int64_t i = start_block_index; i < len; i += 32)
198 blocks[i].position += delta;
199 blocks[i+1].position += delta;
200 blocks[i+2].position += delta;
201 blocks[i+3].position += delta;
202 blocks[i+4].position += delta;
203 blocks[i+5].position += delta;
204 blocks[i+6].position += delta;
205 blocks[i+7].position += delta;
206 blocks[i+8].position += delta;
207 blocks[i+9].position += delta;
208 blocks[i+10].position += delta;
209 blocks[i+11].position += delta;
210 blocks[i+12].position += delta;
211 blocks[i+13].position += delta;
212 blocks[i+14].position += delta;
213 blocks[i+15].position += delta;
214 blocks[i+16].position += delta;
215 blocks[i+17].position += delta;
216 blocks[i+18].position += delta;
217 blocks[i+19].position += delta;
218 blocks[i+20].position += delta;
219 blocks[i+21].position += delta;
220 blocks[i+22].position += delta;
221 blocks[i+23].position += delta;
222 blocks[i+24].position += delta;
223 blocks[i+25].position += delta;
224 blocks[i+26].position += delta;
225 blocks[i+27].position += delta;
226 blocks[i+28].position += delta;
227 blocks[i+29].position += delta;
228 blocks[i+30].position += delta;
229 blocks[i+31].position += delta;
233 for (int64_t i = len; i < rem; ++i)
234 blocks[i].position += delta;
Definition: aos/block_util.hpp:41