75 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H 76 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H 83 #include <immintrin.h> 86 volk_32f_x2_s32f_interleave_16ic_a_avx2(
lv_16sc_t* complexVector,
const float* iBuffer,
87 const float* qBuffer,
const float scalar,
unsigned int num_points)
89 unsigned int number = 0;
90 const float* iBufferPtr = iBuffer;
91 const float* qBufferPtr = qBuffer;
93 __m256 vScalar = _mm256_set1_ps(scalar);
95 const unsigned int eighthPoints = num_points / 8;
97 __m256 iValue, qValue, cplxValue1, cplxValue2;
98 __m256i intValue1, intValue2;
100 int16_t* complexVectorPtr = (int16_t*)complexVector;
102 for(;number < eighthPoints; number++){
103 iValue = _mm256_load_ps(iBufferPtr);
104 qValue = _mm256_load_ps(qBufferPtr);
107 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
108 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
111 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
112 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
114 intValue1 = _mm256_cvtps_epi32(cplxValue1);
115 intValue2 = _mm256_cvtps_epi32(cplxValue2);
117 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
119 _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
120 complexVectorPtr += 16;
126 number = eighthPoints * 8;
127 complexVectorPtr = (int16_t*)(&complexVector[number]);
128 for(; number < num_points; number++){
129 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
130 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
137 #include <emmintrin.h> 141 const float* qBuffer,
const float scalar,
unsigned int num_points)
143 unsigned int number = 0;
144 const float* iBufferPtr = iBuffer;
145 const float* qBufferPtr = qBuffer;
147 __m128 vScalar = _mm_set_ps1(scalar);
149 const unsigned int quarterPoints = num_points / 4;
151 __m128 iValue, qValue, cplxValue1, cplxValue2;
152 __m128i intValue1, intValue2;
154 int16_t* complexVectorPtr = (int16_t*)complexVector;
156 for(;number < quarterPoints; number++){
157 iValue = _mm_load_ps(iBufferPtr);
158 qValue = _mm_load_ps(qBufferPtr);
161 cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
162 cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
165 cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
166 cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
168 intValue1 = _mm_cvtps_epi32(cplxValue1);
169 intValue2 = _mm_cvtps_epi32(cplxValue2);
171 intValue1 = _mm_packs_epi32(intValue1, intValue2);
173 _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
174 complexVectorPtr += 8;
180 number = quarterPoints * 4;
181 complexVectorPtr = (int16_t*)(&complexVector[number]);
182 for(; number < num_points; number++){
183 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
184 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
191 #include <xmmintrin.h> 195 const float* qBuffer,
const float scalar,
unsigned int num_points)
197 unsigned int number = 0;
198 const float* iBufferPtr = iBuffer;
199 const float* qBufferPtr = qBuffer;
201 __m128 vScalar = _mm_set_ps1(scalar);
203 const unsigned int quarterPoints = num_points / 4;
205 __m128 iValue, qValue, cplxValue;
207 int16_t* complexVectorPtr = (int16_t*)complexVector;
211 for(;number < quarterPoints; number++){
212 iValue = _mm_load_ps(iBufferPtr);
213 qValue = _mm_load_ps(qBufferPtr);
216 cplxValue = _mm_unpacklo_ps(iValue, qValue);
217 cplxValue = _mm_mul_ps(cplxValue, vScalar);
219 _mm_store_ps(floatBuffer, cplxValue);
221 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
222 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
223 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
224 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
227 cplxValue = _mm_unpackhi_ps(iValue, qValue);
228 cplxValue = _mm_mul_ps(cplxValue, vScalar);
230 _mm_store_ps(floatBuffer, cplxValue);
232 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
233 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
234 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
235 *complexVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
241 number = quarterPoints * 4;
242 complexVectorPtr = (int16_t*)(&complexVector[number]);
243 for(; number < num_points; number++){
244 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
245 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
251 #ifdef LV_HAVE_GENERIC 255 const float* qBuffer,
const float scalar,
unsigned int num_points)
257 int16_t* complexVectorPtr = (int16_t*)complexVector;
258 const float* iBufferPtr = iBuffer;
259 const float* qBufferPtr = qBuffer;
260 unsigned int number = 0;
262 for(number = 0; number < num_points; number++){
263 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
264 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
272 #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H 273 #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H 276 #include <inttypes.h> 280 #include <immintrin.h> 283 volk_32f_x2_s32f_interleave_16ic_u_avx2(
lv_16sc_t* complexVector,
const float* iBuffer,
284 const float* qBuffer,
const float scalar,
unsigned int num_points)
286 unsigned int number = 0;
287 const float* iBufferPtr = iBuffer;
288 const float* qBufferPtr = qBuffer;
290 __m256 vScalar = _mm256_set1_ps(scalar);
292 const unsigned int eighthPoints = num_points / 8;
294 __m256 iValue, qValue, cplxValue1, cplxValue2;
295 __m256i intValue1, intValue2;
297 int16_t* complexVectorPtr = (int16_t*)complexVector;
299 for(;number < eighthPoints; number++){
300 iValue = _mm256_loadu_ps(iBufferPtr);
301 qValue = _mm256_loadu_ps(qBufferPtr);
304 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
305 cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
308 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
309 cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
311 intValue1 = _mm256_cvtps_epi32(cplxValue1);
312 intValue2 = _mm256_cvtps_epi32(cplxValue2);
314 intValue1 = _mm256_packs_epi32(intValue1, intValue2);
316 _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
317 complexVectorPtr += 16;
323 number = eighthPoints * 8;
324 complexVectorPtr = (int16_t*)(&complexVector[number]);
325 for(; number < num_points; number++){
326 *complexVectorPtr++ = (int16_t)
rintf(*iBufferPtr++ * scalar);
327 *complexVectorPtr++ = (int16_t)
rintf(*qBufferPtr++ * scalar);
static void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:254
short complex lv_16sc_t
Definition: volk_complex.h:58
static float rintf(float x)
Definition: config.h:31
static void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:140
static void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t *complexVector, const float *iBuffer, const float *qBuffer, const float scalar, unsigned int num_points)
Definition: volk_32f_x2_s32f_interleave_16ic.h:194
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:33