73 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
74 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
80 #include <immintrin.h>
85 unsigned int num_points)
87 unsigned int number = 0;
88 float* complexVectorPtr = (
float*)complexVector;
89 const float* iBufferPtr = iBuffer;
90 const float* qBufferPtr = qBuffer;
92 const uint64_t eighthPoints = num_points / 8;
94 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
95 for (; number < eighthPoints; number++) {
96 iValue = _mm256_load_ps(iBufferPtr);
97 qValue = _mm256_load_ps(qBufferPtr);
100 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
102 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
104 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
105 _mm256_store_ps(complexVectorPtr, cplxValue);
106 complexVectorPtr += 8;
108 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
109 _mm256_store_ps(complexVectorPtr, cplxValue);
110 complexVectorPtr += 8;
116 number = eighthPoints * 8;
117 for (; number < num_points; number++) {
118 *complexVectorPtr++ = *iBufferPtr++;
119 *complexVectorPtr++ = *qBufferPtr++;
126 #include <xmmintrin.h>
129 const float* iBuffer,
130 const float* qBuffer,
131 unsigned int num_points)
133 unsigned int number = 0;
134 float* complexVectorPtr = (
float*)complexVector;
135 const float* iBufferPtr = iBuffer;
136 const float* qBufferPtr = qBuffer;
138 const uint64_t quarterPoints = num_points / 4;
140 __m128 iValue, qValue, cplxValue;
141 for (; number < quarterPoints; number++) {
142 iValue = _mm_load_ps(iBufferPtr);
143 qValue = _mm_load_ps(qBufferPtr);
146 cplxValue = _mm_unpacklo_ps(iValue, qValue);
147 _mm_store_ps(complexVectorPtr, cplxValue);
148 complexVectorPtr += 4;
151 cplxValue = _mm_unpackhi_ps(iValue, qValue);
152 _mm_store_ps(complexVectorPtr, cplxValue);
153 complexVectorPtr += 4;
159 number = quarterPoints * 4;
160 for (; number < num_points; number++) {
161 *complexVectorPtr++ = *iBufferPtr++;
162 *complexVectorPtr++ = *qBufferPtr++;
169 #include <arm_neon.h>
172 const float* iBuffer,
173 const float* qBuffer,
174 unsigned int num_points)
176 unsigned int quarter_points = num_points / 4;
178 float* complexVectorPtr = (
float*)complexVector;
180 float32x4x2_t complex_vec;
181 for (number = 0; number < quarter_points; ++number) {
182 complex_vec.val[0] = vld1q_f32(iBuffer);
183 complex_vec.val[1] = vld1q_f32(qBuffer);
184 vst2q_f32(complexVectorPtr, complex_vec);
187 complexVectorPtr += 8;
190 for (number = quarter_points * 4; number < num_points; ++number) {
191 *complexVectorPtr++ = *iBuffer++;
192 *complexVectorPtr++ = *qBuffer++;
198 #ifdef LV_HAVE_GENERIC
201 const float* iBuffer,
202 const float* qBuffer,
203 unsigned int num_points)
205 float* complexVectorPtr = (
float*)complexVector;
206 const float* iBufferPtr = iBuffer;
207 const float* qBufferPtr = qBuffer;
210 for (number = 0; number < num_points; number++) {
211 *complexVectorPtr++ = *iBufferPtr++;
212 *complexVectorPtr++ = *qBufferPtr++;
220 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
221 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
223 #include <inttypes.h>
227 #include <immintrin.h>
230 const float* iBuffer,
231 const float* qBuffer,
232 unsigned int num_points)
234 unsigned int number = 0;
235 float* complexVectorPtr = (
float*)complexVector;
236 const float* iBufferPtr = iBuffer;
237 const float* qBufferPtr = qBuffer;
239 const uint64_t eighthPoints = num_points / 8;
241 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
242 for (; number < eighthPoints; number++) {
243 iValue = _mm256_loadu_ps(iBufferPtr);
244 qValue = _mm256_loadu_ps(qBufferPtr);
247 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
249 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
251 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
252 _mm256_storeu_ps(complexVectorPtr, cplxValue);
253 complexVectorPtr += 8;
255 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
256 _mm256_storeu_ps(complexVectorPtr, cplxValue);
257 complexVectorPtr += 8;
263 number = eighthPoints * 8;
264 for (; number < num_points; number++) {
265 *complexVectorPtr++ = *iBufferPtr++;
266 *complexVectorPtr++ = *qBufferPtr++;