Vector Optimized Library of Kernels  2.0
Architecture-tuned implementations of math kernels
volk_32f_64f_add_64f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2018 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
71 #ifndef INCLUDED_volk_32f_64f_add_64f_H
72 #define INCLUDED_volk_32f_64f_add_64f_H
73 
74 #include <inttypes.h>
75 
76 
77 #ifdef LV_HAVE_GENERIC
78 
79 static inline void
80 volk_32f_64f_add_64f_generic(double *cVector, const float *aVector,
81  const double *bVector, unsigned int num_points)
82 {
83  double *cPtr = cVector;
84  const float *aPtr = aVector;
85  const double *bPtr = bVector;
86  unsigned int number = 0;
87 
88  for (number = 0; number < num_points; number++) {
89  *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
90  }
91 }
92 
93 #endif /* LV_HAVE_GENERIC */
94 
95 /*
96  * Unaligned versions
97  */
98 
99 
100 #ifdef LV_HAVE_AVX
101 
102 #include <immintrin.h>
103 #include <xmmintrin.h>
104 
105 static inline void
106 volk_32f_64f_add_64f_u_avx(double *cVector, const float *aVector,
107  const double *bVector, unsigned int num_points)
108 {
109  unsigned int number = 0;
110  const unsigned int eighth_points = num_points / 8;
111 
112  double *cPtr = cVector;
113  const float *aPtr = aVector;
114  const double *bPtr = bVector;
115 
116  __m256 aVal;
117  __m128 aVal1, aVal2;
118  __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
119  for (; number < eighth_points; number++) {
120 
121  aVal = _mm256_loadu_ps(aPtr);
122  bVal1 = _mm256_loadu_pd(bPtr);
123  bVal2 = _mm256_loadu_pd(bPtr+4);
124 
125  aVal1 = _mm256_extractf128_ps(aVal, 0);
126  aVal2 = _mm256_extractf128_ps(aVal, 1);
127 
128  aDbl1 = _mm256_cvtps_pd(aVal1);
129  aDbl2 = _mm256_cvtps_pd(aVal2);
130 
131  cVal1 = _mm256_add_pd(aDbl1, bVal1);
132  cVal2 = _mm256_add_pd(aDbl2, bVal2);
133 
134  _mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
135  _mm256_storeu_pd(cPtr+4, cVal2); // Store the results back into the C container
136 
137  aPtr += 8;
138  bPtr += 8;
139  cPtr += 8;
140  }
141 
142  number = eighth_points * 8;
143  for (; number < num_points; number++) {
144  *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
145  }
146 }
147 
148 #endif /* LV_HAVE_AVX */
149 
150 
151 #ifdef LV_HAVE_AVX
152 
153 #include <immintrin.h>
154 #include <xmmintrin.h>
155 
156 static inline void
157 volk_32f_64f_add_64f_a_avx(double *cVector, const float *aVector,
158  const double *bVector, unsigned int num_points)
159 {
160  unsigned int number = 0;
161  const unsigned int eighth_points = num_points / 8;
162 
163  double *cPtr = cVector;
164  const float *aPtr = aVector;
165  const double *bPtr = bVector;
166 
167  __m256 aVal;
168  __m128 aVal1, aVal2;
169  __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
170  for (; number < eighth_points; number++) {
171 
172  aVal = _mm256_load_ps(aPtr);
173  bVal1 = _mm256_load_pd(bPtr);
174  bVal2 = _mm256_load_pd(bPtr+4);
175 
176  aVal1 = _mm256_extractf128_ps(aVal, 0);
177  aVal2 = _mm256_extractf128_ps(aVal, 1);
178 
179  aDbl1 = _mm256_cvtps_pd(aVal1);
180  aDbl2 = _mm256_cvtps_pd(aVal2);
181 
182  cVal1 = _mm256_add_pd(aDbl1, bVal1);
183  cVal2 = _mm256_add_pd(aDbl2, bVal2);
184 
185  _mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
186  _mm256_store_pd(cPtr+4, cVal2); // Store the results back into the C container
187 
188  aPtr += 8;
189  bPtr += 8;
190  cPtr += 8;
191  }
192 
193  number = eighth_points * 8;
194  for (; number < num_points; number++) {
195  *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
196  }
197 }
198 
199 #endif /* LV_HAVE_AVX */
200 
201 
202 
203 #endif /* INCLUDED_volk_32f_64f_add_64f_u_H */
static void volk_32f_64f_add_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:80
static void volk_32f_64f_add_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:106
static void volk_32f_64f_add_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:157