81 #ifndef INCLUDED_volk_32i_x2_or_32i_a_H 82 #define INCLUDED_volk_32i_x2_or_32i_a_H 87 #ifdef LV_HAVE_AVX512F 88 #include <immintrin.h> 91 volk_32i_x2_or_32i_a_avx512f(int32_t* cVector,
const int32_t* aVector,
92 const int32_t* bVector,
unsigned int num_points)
94 unsigned int number = 0;
95 const unsigned int sixteenthPoints = num_points / 16;
97 int32_t* cPtr = (int32_t*)cVector;
98 const int32_t* aPtr = (int32_t*)aVector;
99 const int32_t* bPtr = (int32_t*)bVector;
101 __m512i aVal, bVal, cVal;
102 for(;number < sixteenthPoints; number++){
104 aVal = _mm512_load_si512(aPtr);
105 bVal = _mm512_load_si512(bPtr);
107 cVal = _mm512_or_si512(aVal, bVal);
109 _mm512_store_si512(cPtr,cVal);
116 number = sixteenthPoints * 16;
117 for(;number < num_points; number++){
118 cVector[number] = aVector[number] | bVector[number];
124 #include <immintrin.h> 127 volk_32i_x2_or_32i_a_avx2(int32_t* cVector,
const int32_t* aVector,
128 const int32_t* bVector,
unsigned int num_points)
130 unsigned int number = 0;
131 const unsigned int oneEightPoints = num_points / 8;
133 int32_t* cPtr = cVector;
134 const int32_t* aPtr = aVector;
135 const int32_t* bPtr = bVector;
137 __m256i aVal, bVal, cVal;
138 for(;number < oneEightPoints; number++){
140 aVal = _mm256_load_si256((__m256i*)aPtr);
141 bVal = _mm256_load_si256((__m256i*)bPtr);
143 cVal = _mm256_or_si256(aVal, bVal);
145 _mm256_store_si256((__m256i*)cPtr,cVal);
152 number = oneEightPoints * 8;
153 for(;number < num_points; number++){
154 cVector[number] = aVector[number] | bVector[number];
161 #include <xmmintrin.h> 165 const int32_t* bVector,
unsigned int num_points)
167 unsigned int number = 0;
168 const unsigned int quarterPoints = num_points / 4;
170 float* cPtr = (
float*)cVector;
171 const float* aPtr = (
float*)aVector;
172 const float* bPtr = (
float*)bVector;
174 __m128 aVal, bVal, cVal;
175 for(;number < quarterPoints; number++){
176 aVal = _mm_load_ps(aPtr);
177 bVal = _mm_load_ps(bPtr);
179 cVal = _mm_or_ps(aVal, bVal);
181 _mm_store_ps(cPtr,cVal);
188 number = quarterPoints * 4;
189 for(;number < num_points; number++){
190 cVector[number] = aVector[number] | bVector[number];
197 #include <arm_neon.h> 201 const int32_t* bVector,
unsigned int num_points)
203 int32_t* cPtr = cVector;
204 const int32_t* aPtr = aVector;
205 const int32_t* bPtr= bVector;
206 unsigned int number = 0;
207 unsigned int quarter_points = num_points / 4;
209 int32x4_t a_val, b_val, c_val;
211 for(number = 0; number < quarter_points; number++){
212 a_val = vld1q_s32(aPtr);
213 b_val = vld1q_s32(bPtr);
214 c_val = vorrq_s32(a_val, b_val);
215 vst1q_s32(cPtr, c_val);
221 for(number = quarter_points * 4; number < num_points; number++){
222 *cPtr++ = (*aPtr++) | (*bPtr++);
228 #ifdef LV_HAVE_GENERIC 232 const int32_t* bVector,
unsigned int num_points)
234 int32_t* cPtr = cVector;
235 const int32_t* aPtr = aVector;
236 const int32_t* bPtr= bVector;
237 unsigned int number = 0;
239 for(number = 0; number < num_points; number++){
240 *cPtr++ = (*aPtr++) | (*bPtr++);
248 volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector,
const int32_t* aVector,
249 const int32_t* bVector,
unsigned int num_points);
252 volk_32i_x2_or_32i_u_orc(int32_t* cVector,
const int32_t* aVector,
253 const int32_t* bVector,
unsigned int num_points)
255 volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
263 #ifndef INCLUDED_volk_32i_x2_or_32i_u_H 264 #define INCLUDED_volk_32i_x2_or_32i_u_H 266 #include <inttypes.h> 269 #ifdef LV_HAVE_AVX512F 270 #include <immintrin.h> 273 volk_32i_x2_or_32i_u_avx512f(int32_t* cVector,
const int32_t* aVector,
274 const int32_t* bVector,
unsigned int num_points)
276 unsigned int number = 0;
277 const unsigned int sixteenthPoints = num_points / 16;
279 int32_t* cPtr = (int32_t*)cVector;
280 const int32_t* aPtr = (int32_t*)aVector;
281 const int32_t* bPtr = (int32_t*)bVector;
283 __m512i aVal, bVal, cVal;
284 for(;number < sixteenthPoints; number++){
286 aVal = _mm512_loadu_si512(aPtr);
287 bVal = _mm512_loadu_si512(bPtr);
289 cVal = _mm512_or_si512(aVal, bVal);
291 _mm512_storeu_si512(cPtr,cVal);
298 number = sixteenthPoints * 16;
299 for(;number < num_points; number++){
300 cVector[number] = aVector[number] | bVector[number];
306 #include <immintrin.h> 309 volk_32i_x2_or_32i_u_avx2(int32_t* cVector,
const int32_t* aVector,
310 const int32_t* bVector,
unsigned int num_points)
312 unsigned int number = 0;
313 const unsigned int oneEightPoints = num_points / 8;
315 int32_t* cPtr = cVector;
316 const int32_t* aPtr = aVector;
317 const int32_t* bPtr = bVector;
319 __m256i aVal, bVal, cVal;
320 for(;number < oneEightPoints; number++){
322 aVal = _mm256_loadu_si256((__m256i*)aPtr);
323 bVal = _mm256_loadu_si256((__m256i*)bPtr);
325 cVal = _mm256_or_si256(aVal, bVal);
327 _mm256_storeu_si256((__m256i*)cPtr,cVal);
334 number = oneEightPoints * 8;
335 for(;number < num_points; number++){
336 cVector[number] = aVector[number] | bVector[number];
static void volk_32i_x2_or_32i_a_sse(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_or_32i.h:164
static void volk_32i_x2_or_32i_generic(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_or_32i.h:231
static void volk_32i_x2_or_32i_neon(int32_t *cVector, const int32_t *aVector, const int32_t *bVector, unsigned int num_points)
Definition: volk_32i_x2_or_32i.h:200