73 #ifndef INCLUDED_volk_32f_64f_add_64f_H 74 #define INCLUDED_volk_32f_64f_add_64f_H 78 #ifdef LV_HAVE_GENERIC 82 const double *bVector,
83 unsigned int num_points) {
84 double *cPtr = cVector;
85 const float *aPtr = aVector;
86 const double *bPtr = bVector;
87 unsigned int number = 0;
89 for (number = 0; number < num_points; number++) {
90 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
99 static inline void volk_32f_64f_add_64f_neon(
double *cVector,
100 const float *aVector,
101 const double *bVector,
102 unsigned int num_points) {
103 unsigned int number = 0;
104 const unsigned int half_points = num_points / 2;
106 double *cPtr = cVector;
107 const float *aPtr = aVector;
108 const double *bPtr = bVector;
110 float64x2_t aVal, bVal, cVal;
112 for (number = 0; number < half_points; number++) {
114 aVal1 = vld1_f32(aPtr);
115 bVal = vld1q_f64(bPtr);
122 aVal = vcvt_f64_f32(aVal1);
124 cVal = vaddq_f64(aVal, bVal);
126 vst1q_f64(cPtr, cVal);
131 number = half_points * 2;
132 for (; number < num_points; number++) {
133 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
141 #include <immintrin.h> 142 #include <xmmintrin.h> 145 const float *aVector,
146 const double *bVector,
147 unsigned int num_points) {
148 unsigned int number = 0;
149 const unsigned int eighth_points = num_points / 8;
151 double *cPtr = cVector;
152 const float *aPtr = aVector;
153 const double *bPtr = bVector;
157 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
158 for (; number < eighth_points; number++) {
160 aVal = _mm256_loadu_ps(aPtr);
161 bVal1 = _mm256_loadu_pd(bPtr);
162 bVal2 = _mm256_loadu_pd(bPtr + 4);
164 aVal1 = _mm256_extractf128_ps(aVal, 0);
165 aVal2 = _mm256_extractf128_ps(aVal, 1);
167 aDbl1 = _mm256_cvtps_pd(aVal1);
168 aDbl2 = _mm256_cvtps_pd(aVal2);
170 cVal1 = _mm256_add_pd(aDbl1, bVal1);
171 cVal2 = _mm256_add_pd(aDbl2, bVal2);
173 _mm256_storeu_pd(cPtr,
175 _mm256_storeu_pd(cPtr + 4,
183 number = eighth_points * 8;
184 for (; number < num_points; number++) {
185 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
193 #include <immintrin.h> 194 #include <xmmintrin.h> 197 const float *aVector,
198 const double *bVector,
199 unsigned int num_points) {
200 unsigned int number = 0;
201 const unsigned int eighth_points = num_points / 8;
203 double *cPtr = cVector;
204 const float *aPtr = aVector;
205 const double *bPtr = bVector;
209 __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
210 for (; number < eighth_points; number++) {
212 aVal = _mm256_load_ps(aPtr);
213 bVal1 = _mm256_load_pd(bPtr);
214 bVal2 = _mm256_load_pd(bPtr + 4);
216 aVal1 = _mm256_extractf128_ps(aVal, 0);
217 aVal2 = _mm256_extractf128_ps(aVal, 1);
219 aDbl1 = _mm256_cvtps_pd(aVal1);
220 aDbl2 = _mm256_cvtps_pd(aVal2);
222 cVal1 = _mm256_add_pd(aDbl1, bVal1);
223 cVal2 = _mm256_add_pd(aDbl2, bVal2);
225 _mm256_store_pd(cPtr, cVal1);
226 _mm256_store_pd(cPtr + 4,
234 number = eighth_points * 8;
235 for (; number < num_points; number++) {
236 *cPtr++ = ((double)(*aPtr++)) + (*bPtr++);
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:52
static void volk_32f_64f_add_64f_generic(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:80
static void volk_32f_64f_add_64f_u_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:144
static void volk_32f_64f_add_64f_a_avx(double *cVector, const float *aVector, const double *bVector, unsigned int num_points)
Definition: volk_32f_64f_add_64f.h:196