Vector Optimized Library of Kernels  2.1
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_64f_x2.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
74 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
75 
76 #include <inttypes.h>
77 #include <stdio.h>
78 
79 #ifdef LV_HAVE_AVX
80 #include <immintrin.h>
81 
82 static inline void
83 volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer,
84  const lv_32fc_t *complexVector,
85  unsigned int num_points) {
86  unsigned int number = 0;
87 
88  const float *complexVectorPtr = (float *)complexVector;
89  double *iBufferPtr = iBuffer;
90  double *qBufferPtr = qBuffer;
91 
92  const unsigned int quarterPoints = num_points / 4;
93  __m256 cplxValue;
94  __m128 complexH, complexL, fVal;
95  __m256d dVal;
96 
97  for (; number < quarterPoints; number++) {
98 
99  cplxValue = _mm256_loadu_ps(complexVectorPtr);
100  complexVectorPtr += 8;
101 
102  complexH = _mm256_extractf128_ps(cplxValue, 1);
103  complexL = _mm256_extractf128_ps(cplxValue, 0);
104 
105  // Arrange in i1i2i1i2 format
106  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
107  dVal = _mm256_cvtps_pd(fVal);
108  _mm256_storeu_pd(iBufferPtr, dVal);
109 
110  // Arrange in q1q2q1q2 format
111  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
112  dVal = _mm256_cvtps_pd(fVal);
113  _mm256_storeu_pd(qBufferPtr, dVal);
114 
115  iBufferPtr += 4;
116  qBufferPtr += 4;
117  }
118 
119  number = quarterPoints * 4;
120  for (; number < num_points; number++) {
121  *iBufferPtr++ = *complexVectorPtr++;
122  *qBufferPtr++ = *complexVectorPtr++;
123  }
124 }
125 #endif /* LV_HAVE_AVX */
126 
127 #ifdef LV_HAVE_SSE2
128 #include <emmintrin.h>
129 
130 static inline void
131 volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer,
132  const lv_32fc_t *complexVector,
133  unsigned int num_points) {
134  unsigned int number = 0;
135 
136  const float *complexVectorPtr = (float *)complexVector;
137  double *iBufferPtr = iBuffer;
138  double *qBufferPtr = qBuffer;
139 
140  const unsigned int halfPoints = num_points / 2;
141  __m128 cplxValue, fVal;
142  __m128d dVal;
143 
144  for (; number < halfPoints; number++) {
145 
146  cplxValue = _mm_loadu_ps(complexVectorPtr);
147  complexVectorPtr += 4;
148 
149  // Arrange in i1i2i1i2 format
150  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
151  dVal = _mm_cvtps_pd(fVal);
152  _mm_storeu_pd(iBufferPtr, dVal);
153 
154  // Arrange in q1q2q1q2 format
155  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
156  dVal = _mm_cvtps_pd(fVal);
157  _mm_storeu_pd(qBufferPtr, dVal);
158 
159  iBufferPtr += 2;
160  qBufferPtr += 2;
161  }
162 
163  number = halfPoints * 2;
164  for (; number < num_points; number++) {
165  *iBufferPtr++ = *complexVectorPtr++;
166  *qBufferPtr++ = *complexVectorPtr++;
167  }
168 }
169 #endif /* LV_HAVE_SSE */
170 
171 #ifdef LV_HAVE_GENERIC
172 
173 static inline void
174 volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer,
175  const lv_32fc_t *complexVector,
176  unsigned int num_points) {
177  unsigned int number = 0;
178  const float *complexVectorPtr = (float *)complexVector;
179  double *iBufferPtr = iBuffer;
180  double *qBufferPtr = qBuffer;
181 
182  for (number = 0; number < num_points; number++) {
183  *iBufferPtr++ = (double)*complexVectorPtr++;
184  *qBufferPtr++ = (double)*complexVectorPtr++;
185  }
186 }
187 #endif /* LV_HAVE_GENERIC */
188 
189 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
190 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
191 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
192 
193 #include <inttypes.h>
194 #include <stdio.h>
195 
196 #ifdef LV_HAVE_AVX
197 #include <immintrin.h>
198 
199 static inline void
200 volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer,
201  const lv_32fc_t *complexVector,
202  unsigned int num_points) {
203  unsigned int number = 0;
204 
205  const float *complexVectorPtr = (float *)complexVector;
206  double *iBufferPtr = iBuffer;
207  double *qBufferPtr = qBuffer;
208 
209  const unsigned int quarterPoints = num_points / 4;
210  __m256 cplxValue;
211  __m128 complexH, complexL, fVal;
212  __m256d dVal;
213 
214  for (; number < quarterPoints; number++) {
215 
216  cplxValue = _mm256_load_ps(complexVectorPtr);
217  complexVectorPtr += 8;
218 
219  complexH = _mm256_extractf128_ps(cplxValue, 1);
220  complexL = _mm256_extractf128_ps(cplxValue, 0);
221 
222  // Arrange in i1i2i1i2 format
223  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
224  dVal = _mm256_cvtps_pd(fVal);
225  _mm256_store_pd(iBufferPtr, dVal);
226 
227  // Arrange in q1q2q1q2 format
228  fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
229  dVal = _mm256_cvtps_pd(fVal);
230  _mm256_store_pd(qBufferPtr, dVal);
231 
232  iBufferPtr += 4;
233  qBufferPtr += 4;
234  }
235 
236  number = quarterPoints * 4;
237  for (; number < num_points; number++) {
238  *iBufferPtr++ = *complexVectorPtr++;
239  *qBufferPtr++ = *complexVectorPtr++;
240  }
241 }
242 #endif /* LV_HAVE_AVX */
243 
244 #ifdef LV_HAVE_SSE2
245 #include <emmintrin.h>
246 
247 static inline void
248 volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer,
249  const lv_32fc_t *complexVector,
250  unsigned int num_points) {
251  unsigned int number = 0;
252 
253  const float *complexVectorPtr = (float *)complexVector;
254  double *iBufferPtr = iBuffer;
255  double *qBufferPtr = qBuffer;
256 
257  const unsigned int halfPoints = num_points / 2;
258  __m128 cplxValue, fVal;
259  __m128d dVal;
260 
261  for (; number < halfPoints; number++) {
262 
263  cplxValue = _mm_load_ps(complexVectorPtr);
264  complexVectorPtr += 4;
265 
266  // Arrange in i1i2i1i2 format
267  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
268  dVal = _mm_cvtps_pd(fVal);
269  _mm_store_pd(iBufferPtr, dVal);
270 
271  // Arrange in q1q2q1q2 format
272  fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
273  dVal = _mm_cvtps_pd(fVal);
274  _mm_store_pd(qBufferPtr, dVal);
275 
276  iBufferPtr += 2;
277  qBufferPtr += 2;
278  }
279 
280  number = halfPoints * 2;
281  for (; number < num_points; number++) {
282  *iBufferPtr++ = *complexVectorPtr++;
283  *qBufferPtr++ = *complexVectorPtr++;
284  }
285 }
286 #endif /* LV_HAVE_SSE */
287 
288 #ifdef LV_HAVE_GENERIC
289 
290 static inline void
291 volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer,
292  const lv_32fc_t *complexVector,
293  unsigned int num_points) {
294  unsigned int number = 0;
295  const float *complexVectorPtr = (float *)complexVector;
296  double *iBufferPtr = iBuffer;
297  double *qBufferPtr = qBuffer;
298 
299  for (number = 0; number < num_points; number++) {
300  *iBufferPtr++ = (double)*complexVectorPtr++;
301  *qBufferPtr++ = (double)*complexVectorPtr++;
302  }
303 }
304 #endif /* LV_HAVE_GENERIC */
305 
306 #ifdef LV_HAVE_NEONV8
307 #include <arm_neon.h>
308 
309 static inline void
310 volk_32fc_deinterleave_64f_x2_neon(double *iBuffer, double *qBuffer,
311  const lv_32fc_t *complexVector,
312  unsigned int num_points) {
313  unsigned int number = 0;
314  unsigned int half_points = num_points / 2;
315  const float *complexVectorPtr = (float *)complexVector;
316  double *iBufferPtr = iBuffer;
317  double *qBufferPtr = qBuffer;
318  float32x2x2_t complexInput;
319  float64x2_t iVal, qVal;
320 
321  for (number = 0; number < half_points; number++) {
322  complexInput = vld2_f32(complexVectorPtr);
323 
324  iVal = vcvt_f64_f32(complexInput.val[0]);
325  qVal = vcvt_f64_f32(complexInput.val[1]);
326 
327  vst1q_f64(iBufferPtr, iVal);
328  vst1q_f64(qBufferPtr, qVal);
329 
330  complexVectorPtr += 4;
331  iBufferPtr += 2;
332  qBufferPtr += 2;
333  }
334 
335  for (number = half_points * 2; number < num_points; number++) {
336  *iBufferPtr++ = (double)*complexVectorPtr++;
337  *qBufferPtr++ = (double)*complexVectorPtr++;
338  }
339 }
340 #endif /* LV_HAVE_NEONV8 */
341 
342 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
static void volk_32fc_deinterleave_64f_x2_a_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:248
static void volk_32fc_deinterleave_64f_x2_u_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:83
static void volk_32fc_deinterleave_64f_x2_a_avx(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:200
static void volk_32fc_deinterleave_64f_x2_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:174
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_deinterleave_64f_x2_u_sse2(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:131
static void volk_32fc_deinterleave_64f_x2_a_generic(double *iBuffer, double *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_64f_x2.h:291