Vector Optimized Library of Kernels  2.1
Architecture-tuned implementations of math kernels
volk_32fc_s32f_magnitude_16i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of GNU Radio
6  *
7  * GNU Radio is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3, or (at your option)
10  * any later version.
11  *
12  * GNU Radio is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with GNU Radio; see the file COPYING. If not, write to
19  * the Free Software Foundation, Inc., 51 Franklin Street,
20  * Boston, MA 02110-1301, USA.
21  */
22 
73 #ifdef LV_HAVE_GENERIC
74 #include <volk/volk_common.h>
75 
76 static inline void
77 volk_32fc_s32f_magnitude_16i_generic(int16_t* magnitudeVector, const lv_32fc_t* complexVector,
78  const float scalar, unsigned int num_points)
79 {
80  const float* complexVectorPtr = (float*)complexVector;
81  int16_t* magnitudeVectorPtr = magnitudeVector;
82  unsigned int number = 0;
83  for(number = 0; number < num_points; number++){
84  __VOLK_VOLATILE float real = *complexVectorPtr++;
85  __VOLK_VOLATILE float imag = *complexVectorPtr++;
86  real *= real;
87  imag *= imag;
88  *magnitudeVectorPtr++ = (int16_t)rintf(scalar*sqrtf(real + imag));
89  }
90 }
91 #endif /* LV_HAVE_GENERIC */
92 
93 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
94 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
95 
96 #include <volk/volk_common.h>
97 #include <inttypes.h>
98 #include <stdio.h>
99 #include <math.h>
100 
101 #ifdef LV_HAVE_AVX2
102 #include <immintrin.h>
103 
104 static inline void
105 volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector, const lv_32fc_t* complexVector,
106  const float scalar, unsigned int num_points)
107 {
108  unsigned int number = 0;
109  const unsigned int eighthPoints = num_points / 8;
110 
111  const float* complexVectorPtr = (const float*)complexVector;
112  int16_t* magnitudeVectorPtr = magnitudeVector;
113 
114  __m256 vScalar = _mm256_set1_ps(scalar);
115  __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
116  __m256 cplxValue1, cplxValue2, result;
117  __m256i resultInt;
118  __m128i resultShort;
119 
120  for(;number < eighthPoints; number++){
121  cplxValue1 = _mm256_load_ps(complexVectorPtr);
122  complexVectorPtr += 8;
123 
124  cplxValue2 = _mm256_load_ps(complexVectorPtr);
125  complexVectorPtr += 8;
126 
127  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
128  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
129 
130  result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
131 
132  result = _mm256_sqrt_ps(result);
133 
134  result = _mm256_mul_ps(result, vScalar);
135 
136  resultInt = _mm256_cvtps_epi32(result);
137  resultInt = _mm256_packs_epi32(resultInt, resultInt);
138  resultInt = _mm256_permutevar8x32_epi32(resultInt, idx); //permute to compensate for shuffling in hadd and packs
139  resultShort = _mm256_extracti128_si256(resultInt,0);
140  _mm_store_si128((__m128i*)magnitudeVectorPtr,resultShort);
141  magnitudeVectorPtr += 8;
142  }
143 
144  number = eighthPoints * 8;
145  volk_32fc_s32f_magnitude_16i_generic(magnitudeVector+number, complexVector+number, scalar, num_points-number);
146 }
147 #endif /* LV_HAVE_AVX2 */
148 
149 #ifdef LV_HAVE_SSE3
150 #include <pmmintrin.h>
151 
152 static inline void
153 volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector, const lv_32fc_t* complexVector,
154  const float scalar, unsigned int num_points)
155 {
156  unsigned int number = 0;
157  const unsigned int quarterPoints = num_points / 4;
158 
159  const float* complexVectorPtr = (const float*)complexVector;
160  int16_t* magnitudeVectorPtr = magnitudeVector;
161 
162  __m128 vScalar = _mm_set_ps1(scalar);
163 
164  __m128 cplxValue1, cplxValue2, result;
165 
166  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
167 
168  for(;number < quarterPoints; number++){
169  cplxValue1 = _mm_load_ps(complexVectorPtr);
170  complexVectorPtr += 4;
171 
172  cplxValue2 = _mm_load_ps(complexVectorPtr);
173  complexVectorPtr += 4;
174 
175  cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
176  cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
177 
178  result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
179 
180  result = _mm_sqrt_ps(result);
181 
182  result = _mm_mul_ps(result, vScalar);
183 
184  _mm_store_ps(floatBuffer, result);
185  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
186  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
187  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
188  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
189  }
190 
191  number = quarterPoints * 4;
192  volk_32fc_s32f_magnitude_16i_generic(magnitudeVector+number, complexVector+number, scalar, num_points-number);
193 }
194 #endif /* LV_HAVE_SSE3 */
195 
196 
197 #ifdef LV_HAVE_SSE
198 #include <xmmintrin.h>
199 
200 static inline void
201 volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector, const lv_32fc_t* complexVector,
202  const float scalar, unsigned int num_points)
203 {
204  unsigned int number = 0;
205  const unsigned int quarterPoints = num_points / 4;
206 
207  const float* complexVectorPtr = (const float*)complexVector;
208  int16_t* magnitudeVectorPtr = magnitudeVector;
209 
210  __m128 vScalar = _mm_set_ps1(scalar);
211 
212  __m128 cplxValue1, cplxValue2, result;
213  __m128 iValue, qValue;
214 
215  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
216 
217  for(;number < quarterPoints; number++){
218  cplxValue1 = _mm_load_ps(complexVectorPtr);
219  complexVectorPtr += 4;
220 
221  cplxValue2 = _mm_load_ps(complexVectorPtr);
222  complexVectorPtr += 4;
223 
224  // Arrange in i1i2i3i4 format
225  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
226  // Arrange in q1q2q3q4 format
227  qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
228 
229  __VOLK_VOLATILE __m128 iValue2 = _mm_mul_ps(iValue, iValue); // Square the I values
230  __VOLK_VOLATILE __m128 qValue2 = _mm_mul_ps(qValue, qValue); // Square the Q Values
231 
232  result = _mm_add_ps(iValue2, qValue2); // Add the I2 and Q2 values
233 
234  result = _mm_sqrt_ps(result);
235 
236  result = _mm_mul_ps(result, vScalar);
237 
238  _mm_store_ps(floatBuffer, result);
239  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
240  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
241  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
242  *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
243  }
244 
245  number = quarterPoints * 4;
246  volk_32fc_s32f_magnitude_16i_generic(magnitudeVector+number, complexVector+number, scalar, num_points-number);
247 }
248 #endif /* LV_HAVE_SSE */
249 
250 
251 #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
252 
253 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
254 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
255 
256 #include <volk/volk_common.h>
257 #include <inttypes.h>
258 #include <stdio.h>
259 #include <math.h>
260 
261 #ifdef LV_HAVE_AVX2
262 #include <immintrin.h>
263 
264 static inline void
265 volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector, const lv_32fc_t* complexVector,
266  const float scalar, unsigned int num_points)
267 {
268  unsigned int number = 0;
269  const unsigned int eighthPoints = num_points / 8;
270 
271  const float* complexVectorPtr = (const float*)complexVector;
272  int16_t* magnitudeVectorPtr = magnitudeVector;
273 
274  __m256 vScalar = _mm256_set1_ps(scalar);
275  __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
276  __m256 cplxValue1, cplxValue2, result;
277  __m256i resultInt;
278  __m128i resultShort;
279 
280  for(;number < eighthPoints; number++){
281  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
282  complexVectorPtr += 8;
283 
284  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
285  complexVectorPtr += 8;
286 
287  cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
288  cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
289 
290  result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
291 
292  result = _mm256_sqrt_ps(result);
293 
294  result = _mm256_mul_ps(result, vScalar);
295 
296  resultInt = _mm256_cvtps_epi32(result);
297  resultInt = _mm256_packs_epi32(resultInt, resultInt);
298  resultInt = _mm256_permutevar8x32_epi32(resultInt, idx); //permute to compensate for shuffling in hadd and packs
299  resultShort = _mm256_extracti128_si256(resultInt,0);
300  _mm_storeu_si128((__m128i*)magnitudeVectorPtr,resultShort);
301  magnitudeVectorPtr += 8;
302  }
303 
304  number = eighthPoints * 8;
305  volk_32fc_s32f_magnitude_16i_generic(magnitudeVector+number, complexVector+number, scalar, num_points-number);
306 }
307 #endif /* LV_HAVE_AVX2 */
308 
309 #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */
#define __VOLK_VOLATILE
Definition: volk_common.h:54
static float rintf(float x)
Definition: config.h:31
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:46
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:153
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:77
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:201