Vector Optimized Library of Kernels  2.2
Architecture-tuned implementations of math kernels
volk_32f_64f_multiply_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2018 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32f_64f_multiply_64f_H
72 #define INCLUDED_volk_32f_64f_multiply_64f_H
73 
74 #include <inttypes.h>
75 
76 
77 #ifdef LV_HAVE_GENERIC
78 
79 static inline void volk_32f_64f_multiply_64f_generic(double* cVector,
80  const float* aVector,
81  const double* bVector,
82  unsigned int num_points)
83 {
84  double* cPtr = cVector;
85  const float* aPtr = aVector;
86  const double* bPtr = bVector;
87  unsigned int number = 0;
88 
89  for (number = 0; number < num_points; number++) {
90  *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
91  }
92 }
93 
94 #endif /* LV_HAVE_GENERIC */
95 
96 /*
97  * Unaligned versions
98  */
99 
100 
101 #ifdef LV_HAVE_AVX
102 
103 #include <immintrin.h>
104 #include <xmmintrin.h>
105 
106 static inline void volk_32f_64f_multiply_64f_u_avx(double* cVector,
107  const float* aVector,
108  const double* bVector,
109  unsigned int num_points)
110 {
111  unsigned int number = 0;
112  const unsigned int eighth_points = num_points / 8;
113 
114  double* cPtr = cVector;
115  const float* aPtr = aVector;
116  const double* bPtr = bVector;
117 
118  __m256 aVal;
119  __m128 aVal1, aVal2;
120  __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
121  for (; number < eighth_points; number++) {
122 
123  aVal = _mm256_loadu_ps(aPtr);
124  bVal1 = _mm256_loadu_pd(bPtr);
125  bVal2 = _mm256_loadu_pd(bPtr + 4);
126 
127  aVal1 = _mm256_extractf128_ps(aVal, 0);
128  aVal2 = _mm256_extractf128_ps(aVal, 1);
129 
130  aDbl1 = _mm256_cvtps_pd(aVal1);
131  aDbl2 = _mm256_cvtps_pd(aVal2);
132 
133  cVal1 = _mm256_mul_pd(aDbl1, bVal1);
134  cVal2 = _mm256_mul_pd(aDbl2, bVal2);
135 
136  _mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
137  _mm256_storeu_pd(cPtr + 4, cVal2); // Store the results back into the C container
138 
139  aPtr += 8;
140  bPtr += 8;
141  cPtr += 8;
142  }
143 
144  number = eighth_points * 8;
145  for (; number < num_points; number++) {
146  *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
147  }
148 }
149 
150 #endif /* LV_HAVE_AVX */
151 
152 
153 #ifdef LV_HAVE_AVX
154 
155 #include <immintrin.h>
156 #include <xmmintrin.h>
157 
158 static inline void volk_32f_64f_multiply_64f_a_avx(double* cVector,
159  const float* aVector,
160  const double* bVector,
161  unsigned int num_points)
162 {
163  unsigned int number = 0;
164  const unsigned int eighth_points = num_points / 8;
165 
166  double* cPtr = cVector;
167  const float* aPtr = aVector;
168  const double* bPtr = bVector;
169 
170  __m256 aVal;
171  __m128 aVal1, aVal2;
172  __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
173  for (; number < eighth_points; number++) {
174 
175  aVal = _mm256_load_ps(aPtr);
176  bVal1 = _mm256_load_pd(bPtr);
177  bVal2 = _mm256_load_pd(bPtr + 4);
178 
179  aVal1 = _mm256_extractf128_ps(aVal, 0);
180  aVal2 = _mm256_extractf128_ps(aVal, 1);
181 
182  aDbl1 = _mm256_cvtps_pd(aVal1);
183  aDbl2 = _mm256_cvtps_pd(aVal2);
184 
185  cVal1 = _mm256_mul_pd(aDbl1, bVal1);
186  cVal2 = _mm256_mul_pd(aDbl2, bVal2);
187 
188  _mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
189  _mm256_store_pd(cPtr + 4, cVal2); // Store the results back into the C container
190 
191  aPtr += 8;
192  bPtr += 8;
193  cPtr += 8;
194  }
195 
196  number = eighth_points * 8;
197  for (; number < num_points; number++) {
198  *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
199  }
200 }
201 
202 #endif /* LV_HAVE_AVX */
203 
204 
205 #endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
volk_32f_64f_multiply_64f_generic
static void volk_32f_64f_multiply_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:79
volk_32f_64f_multiply_64f_a_avx
static void volk_32f_64f_multiply_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:158
volk_32f_64f_multiply_64f_u_avx
static void volk_32f_64f_multiply_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_multiply_64f.h:106