Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_64f_x2_multiply_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2018 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_64f_x2_multiply_64f_H
72 #define INCLUDED_volk_64f_x2_multiply_64f_H
73 
74 #include <inttypes.h>
75 
76 
77 #ifdef LV_HAVE_GENERIC
78 
79 static inline void
80 volk_64f_x2_multiply_64f_generic(double *cVector, const double *aVector,
81  const double *bVector, unsigned int num_points)
82 {
83  double *cPtr = cVector;
84  const double *aPtr = aVector;
85  const double *bPtr = bVector;
86  unsigned int number = 0;
87 
88  for (number = 0; number < num_points; number++) {
89  *cPtr++ = (*aPtr++) * (*bPtr++);
90  }
91 }
92 
93 #endif /* LV_HAVE_GENERIC */
94 
95 /*
96  * Unaligned versions
97  */
98 
99 #ifdef LV_HAVE_SSE2
100 
101 #include <emmintrin.h>
102 
103 static inline void
104 volk_64f_x2_multiply_64f_u_sse2(double *cVector, const double *aVector,
105  const double *bVector, unsigned int num_points)
106 {
107  unsigned int number = 0;
108  const unsigned int half_points = num_points / 2;
109 
110  double *cPtr = cVector;
111  const double *aPtr = aVector;
112  const double *bPtr = bVector;
113 
114  __m128d aVal, bVal, cVal;
115  for (; number < half_points; number++) {
116  aVal = _mm_loadu_pd(aPtr);
117  bVal = _mm_loadu_pd(bPtr);
118 
119  cVal = _mm_mul_pd(aVal, bVal);
120 
121  _mm_storeu_pd(cPtr, cVal); // Store the results back into the C container
122 
123  aPtr += 2;
124  bPtr += 2;
125  cPtr += 2;
126  }
127 
128  number = half_points * 2;
129  for (; number < num_points; number++) {
130  *cPtr++ = (*aPtr++) * (*bPtr++);
131  }
132 }
133 
134 #endif /* LV_HAVE_SSE2 */
135 
136 
137 #ifdef LV_HAVE_AVX
138 
139 #include <immintrin.h>
140 
141 static inline void
142 volk_64f_x2_multiply_64f_u_avx(double *cVector, const double *aVector,
143  const double *bVector, unsigned int num_points)
144 {
145  unsigned int number = 0;
146  const unsigned int quarter_points = num_points / 4;
147 
148  double *cPtr = cVector;
149  const double *aPtr = aVector;
150  const double *bPtr = bVector;
151 
152  __m256d aVal, bVal, cVal;
153  for (; number < quarter_points; number++) {
154 
155  aVal = _mm256_loadu_pd(aPtr);
156  bVal = _mm256_loadu_pd(bPtr);
157 
158  cVal = _mm256_mul_pd(aVal, bVal);
159 
160  _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
161 
162  aPtr += 4;
163  bPtr += 4;
164  cPtr += 4;
165  }
166 
167  number = quarter_points * 4;
168  for (; number < num_points; number++) {
169  *cPtr++ = (*aPtr++) * (*bPtr++);
170  }
171 }
172 
173 #endif /* LV_HAVE_AVX */
174 
175 /*
176  * Aligned versions
177  */
178 
179 #ifdef LV_HAVE_SSE2
180 
181 #include <emmintrin.h>
182 
183 static inline void
184 volk_64f_x2_multiply_64f_a_sse2(double *cVector, const double *aVector,
185  const double *bVector, unsigned int num_points)
186 {
187  unsigned int number = 0;
188  const unsigned int half_points = num_points / 2;
189 
190  double *cPtr = cVector;
191  const double *aPtr = aVector;
192  const double *bPtr = bVector;
193 
194  __m128d aVal, bVal, cVal;
195  for (; number < half_points; number++) {
196  aVal = _mm_load_pd(aPtr);
197  bVal = _mm_load_pd(bPtr);
198 
199  cVal = _mm_mul_pd(aVal, bVal);
200 
201  _mm_store_pd(cPtr, cVal); // Store the results back into the C container
202 
203  aPtr += 2;
204  bPtr += 2;
205  cPtr += 2;
206  }
207 
208  number = half_points * 2;
209  for (; number < num_points; number++) {
210  *cPtr++ = (*aPtr++) * (*bPtr++);
211  }
212 }
213 
214 #endif /* LV_HAVE_SSE2 */
215 
216 
217 #ifdef LV_HAVE_AVX
218 
219 #include <immintrin.h>
220 
221 static inline void
222 volk_64f_x2_multiply_64f_a_avx(double *cVector, const double *aVector,
223  const double *bVector, unsigned int num_points)
224 {
225  unsigned int number = 0;
226  const unsigned int quarter_points = num_points / 4;
227 
228  double *cPtr = cVector;
229  const double *aPtr = aVector;
230  const double *bPtr = bVector;
231 
232  __m256d aVal, bVal, cVal;
233  for (; number < quarter_points; number++) {
234 
235  aVal = _mm256_load_pd(aPtr);
236  bVal = _mm256_load_pd(bPtr);
237 
238  cVal = _mm256_mul_pd(aVal, bVal);
239 
240  _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
241 
242  aPtr += 4;
243  bPtr += 4;
244  cPtr += 4;
245  }
246 
247  number = quarter_points * 4;
248  for (; number < num_points; number++) {
249  *cPtr++ = (*aPtr++) * (*bPtr++);
250  }
251 }
252 
253 #endif /* LV_HAVE_AVX */
254 
255 #endif /* INCLUDED_volk_64f_x2_multiply_64f_u_H */
static void volk_64f_x2_multiply_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_multiply_64f.h:184
static void volk_64f_x2_multiply_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_multiply_64f.h:222
static void volk_64f_x2_multiply_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_multiply_64f.h:80
static void volk_64f_x2_multiply_64f_u_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_multiply_64f.h:104
static void volk_64f_x2_multiply_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_multiply_64f.h:142