Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
73 
74 #include <inttypes.h>
75 #include <stdio.h>
76 
77 #ifdef LV_HAVE_AVX2
78 #include <immintrin.h>
79 
80 static inline void
81 volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer, const lv_32fc_t* complexVector,
82  unsigned int num_points)
83 {
84  unsigned int number = 0;
85 
86  const float* complexVectorPtr = (float*)complexVector;
87  double* iBufferPtr = iBuffer;
88 
89  const unsigned int quarterPoints = num_points / 4;
90  __m256 cplxValue;
91  __m128 fVal;
92  __m256d dVal;
93  __m256i idx = _mm256_set_epi32(0,0,0,0,6,4,2,0);
94  for(;number < quarterPoints; number++){
95 
96  cplxValue = _mm256_load_ps(complexVectorPtr);
97  complexVectorPtr += 8;
98 
99  // Arrange in i1i2i1i2 format
100  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
101  fVal = _mm256_extractf128_ps(cplxValue, 0);
102  dVal = _mm256_cvtps_pd(fVal);
103  _mm256_store_pd(iBufferPtr, dVal);
104 
105  iBufferPtr += 4;
106  }
107 
108  number = quarterPoints * 4;
109  for(; number < num_points; number++){
110  *iBufferPtr++ = (double)*complexVectorPtr++;
111  complexVectorPtr++;
112  }
113 }
114 #endif /* LV_HAVE_AVX2 */
115 
116 #ifdef LV_HAVE_SSE2
117 #include <emmintrin.h>
118 
119 static inline void
120 volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, const lv_32fc_t* complexVector,
121  unsigned int num_points)
122 {
123  unsigned int number = 0;
124 
125  const float* complexVectorPtr = (float*)complexVector;
126  double* iBufferPtr = iBuffer;
127 
128  const unsigned int halfPoints = num_points / 2;
129  __m128 cplxValue, fVal;
130  __m128d dVal;
131  for(;number < halfPoints; number++){
132 
133  cplxValue = _mm_load_ps(complexVectorPtr);
134  complexVectorPtr += 4;
135 
136  // Arrange in i1i2i1i2 format
137  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
138  dVal = _mm_cvtps_pd(fVal);
139  _mm_store_pd(iBufferPtr, dVal);
140 
141  iBufferPtr += 2;
142  }
143 
144  number = halfPoints * 2;
145  for(; number < num_points; number++){
146  *iBufferPtr++ = (double)*complexVectorPtr++;
147  complexVectorPtr++;
148  }
149 }
150 #endif /* LV_HAVE_SSE */
151 
152 
153 #ifdef LV_HAVE_GENERIC
154 
155 static inline void
156 volk_32fc_deinterleave_real_64f_generic(double* iBuffer, const lv_32fc_t* complexVector,
157  unsigned int num_points)
158 {
159  unsigned int number = 0;
160  const float* complexVectorPtr = (float*)complexVector;
161  double* iBufferPtr = iBuffer;
162  for(number = 0; number < num_points; number++){
163  *iBufferPtr++ = (double)*complexVectorPtr++;
164  complexVectorPtr++;
165  }
166 }
167 #endif /* LV_HAVE_GENERIC */
168 
169 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
170 
171 
172 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
173 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
174 
175 #include <inttypes.h>
176 #include <stdio.h>
177 
178 #ifdef LV_HAVE_AVX2
179 #include <immintrin.h>
180 
181 static inline void
182 volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer, const lv_32fc_t* complexVector,
183  unsigned int num_points)
184 {
185  unsigned int number = 0;
186 
187  const float* complexVectorPtr = (float*)complexVector;
188  double* iBufferPtr = iBuffer;
189 
190  const unsigned int quarterPoints = num_points / 4;
191  __m256 cplxValue;
192  __m128 fVal;
193  __m256d dVal;
194  __m256i idx = _mm256_set_epi32(0,0,0,0,6,4,2,0);
195  for(;number < quarterPoints; number++){
196 
197  cplxValue = _mm256_loadu_ps(complexVectorPtr);
198  complexVectorPtr += 8;
199 
200  // Arrange in i1i2i1i2 format
201  cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
202  fVal = _mm256_extractf128_ps(cplxValue, 0);
203  dVal = _mm256_cvtps_pd(fVal);
204  _mm256_storeu_pd(iBufferPtr, dVal);
205 
206  iBufferPtr += 4;
207  }
208 
209  number = quarterPoints * 4;
210  for(; number < num_points; number++){
211  *iBufferPtr++ = (double)*complexVectorPtr++;
212  complexVectorPtr++;
213  }
214 }
215 #endif /* LV_HAVE_AVX2 */
216 
217 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
218 
219 
static void volk_32fc_deinterleave_real_64f_a_sse2(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:120
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_deinterleave_real_64f_generic(double *iBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_real_64f.h:156