73 #ifdef LV_HAVE_GENERIC 78 const float scalar,
unsigned int num_points)
80 const float* complexVectorPtr = (
float*)complexVector;
81 int16_t* magnitudeVectorPtr = magnitudeVector;
82 unsigned int number = 0;
83 for(number = 0; number < num_points; number++){
88 *magnitudeVectorPtr++ = (int16_t)
rintf(scalar*sqrtf(real + imag));
93 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 94 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H 102 #include <immintrin.h> 105 volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
106 const float scalar,
unsigned int num_points)
108 unsigned int number = 0;
109 const unsigned int eighthPoints = num_points / 8;
111 const float* complexVectorPtr = (
const float*)complexVector;
112 int16_t* magnitudeVectorPtr = magnitudeVector;
114 __m256 vScalar = _mm256_set1_ps(scalar);
115 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
116 __m256 cplxValue1, cplxValue2, result;
120 for(;number < eighthPoints; number++){
121 cplxValue1 = _mm256_load_ps(complexVectorPtr);
122 complexVectorPtr += 8;
124 cplxValue2 = _mm256_load_ps(complexVectorPtr);
125 complexVectorPtr += 8;
127 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
128 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
130 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
132 result = _mm256_sqrt_ps(result);
134 result = _mm256_mul_ps(result, vScalar);
136 resultInt = _mm256_cvtps_epi32(result);
137 resultInt = _mm256_packs_epi32(resultInt, resultInt);
138 resultInt = _mm256_permutevar8x32_epi32(resultInt, idx);
139 resultShort = _mm256_extracti128_si256(resultInt,0);
140 _mm_store_si128((__m128i*)magnitudeVectorPtr,resultShort);
141 magnitudeVectorPtr += 8;
144 number = eighthPoints * 8;
150 #include <pmmintrin.h> 154 const float scalar,
unsigned int num_points)
156 unsigned int number = 0;
157 const unsigned int quarterPoints = num_points / 4;
159 const float* complexVectorPtr = (
const float*)complexVector;
160 int16_t* magnitudeVectorPtr = magnitudeVector;
162 __m128 vScalar = _mm_set_ps1(scalar);
164 __m128 cplxValue1, cplxValue2, result;
168 for(;number < quarterPoints; number++){
169 cplxValue1 = _mm_load_ps(complexVectorPtr);
170 complexVectorPtr += 4;
172 cplxValue2 = _mm_load_ps(complexVectorPtr);
173 complexVectorPtr += 4;
175 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1);
176 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2);
178 result = _mm_hadd_ps(cplxValue1, cplxValue2);
180 result = _mm_sqrt_ps(result);
182 result = _mm_mul_ps(result, vScalar);
184 _mm_store_ps(floatBuffer, result);
185 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
186 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
187 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
188 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
191 number = quarterPoints * 4;
198 #include <xmmintrin.h> 202 const float scalar,
unsigned int num_points)
204 unsigned int number = 0;
205 const unsigned int quarterPoints = num_points / 4;
207 const float* complexVectorPtr = (
const float*)complexVector;
208 int16_t* magnitudeVectorPtr = magnitudeVector;
210 __m128 vScalar = _mm_set_ps1(scalar);
212 __m128 cplxValue1, cplxValue2, result;
213 __m128 iValue, qValue;
217 for(;number < quarterPoints; number++){
218 cplxValue1 = _mm_load_ps(complexVectorPtr);
219 complexVectorPtr += 4;
221 cplxValue2 = _mm_load_ps(complexVectorPtr);
222 complexVectorPtr += 4;
225 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
227 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
232 result = _mm_add_ps(iValue2, qValue2);
234 result = _mm_sqrt_ps(result);
236 result = _mm_mul_ps(result, vScalar);
238 _mm_store_ps(floatBuffer, result);
239 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[0]);
240 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[1]);
241 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[2]);
242 *magnitudeVectorPtr++ = (int16_t)
rintf(floatBuffer[3]);
245 number = quarterPoints * 4;
253 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 254 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H 257 #include <inttypes.h> 262 #include <immintrin.h> 265 volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
const lv_32fc_t* complexVector,
266 const float scalar,
unsigned int num_points)
268 unsigned int number = 0;
269 const unsigned int eighthPoints = num_points / 8;
271 const float* complexVectorPtr = (
const float*)complexVector;
272 int16_t* magnitudeVectorPtr = magnitudeVector;
274 __m256 vScalar = _mm256_set1_ps(scalar);
275 __m256i idx = _mm256_set_epi32(0,0,0,0,5,1,4,0);
276 __m256 cplxValue1, cplxValue2, result;
280 for(;number < eighthPoints; number++){
281 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
282 complexVectorPtr += 8;
284 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
285 complexVectorPtr += 8;
287 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
288 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
290 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
292 result = _mm256_sqrt_ps(result);
294 result = _mm256_mul_ps(result, vScalar);
296 resultInt = _mm256_cvtps_epi32(result);
297 resultInt = _mm256_packs_epi32(resultInt, resultInt);
298 resultInt = _mm256_permutevar8x32_epi32(resultInt, idx);
299 resultShort = _mm256_extracti128_si256(resultInt,0);
300 _mm_storeu_si128((__m128i*)magnitudeVectorPtr,resultShort);
301 magnitudeVectorPtr += 8;
304 number = eighthPoints * 8;
#define __VOLK_VOLATILE
Definition: volk_common.h:54
static float rintf(float x)
Definition: config.h:31
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:46
static void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:153
float complex lv_32fc_t
Definition: volk_complex.h:61
static void volk_32fc_s32f_magnitude_16i_generic(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:77
static void volk_32fc_s32f_magnitude_16i_a_sse(int16_t *magnitudeVector, const lv_32fc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_32fc_s32f_magnitude_16i.h:201