71 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
72 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
78 #include <immintrin.h>
80 static inline void volk_32fc_deinterleave_real_64f_a_avx2(
81 double *iBuffer,
const lv_32fc_t *complexVector,
unsigned int num_points) {
82 unsigned int number = 0;
84 const float *complexVectorPtr = (
float *)complexVector;
85 double *iBufferPtr = iBuffer;
87 const unsigned int quarterPoints = num_points / 4;
91 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
92 for (; number < quarterPoints; number++) {
94 cplxValue = _mm256_load_ps(complexVectorPtr);
95 complexVectorPtr += 8;
98 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
99 fVal = _mm256_extractf128_ps(cplxValue, 0);
100 dVal = _mm256_cvtps_pd(fVal);
101 _mm256_store_pd(iBufferPtr, dVal);
106 number = quarterPoints * 4;
107 for (; number < num_points; number++) {
108 *iBufferPtr++ = (double)*complexVectorPtr++;
115 #include <emmintrin.h>
118 double *iBuffer,
const lv_32fc_t *complexVector,
unsigned int num_points) {
119 unsigned int number = 0;
121 const float *complexVectorPtr = (
float *)complexVector;
122 double *iBufferPtr = iBuffer;
124 const unsigned int halfPoints = num_points / 2;
125 __m128 cplxValue, fVal;
127 for (; number < halfPoints; number++) {
129 cplxValue = _mm_load_ps(complexVectorPtr);
130 complexVectorPtr += 4;
133 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
134 dVal = _mm_cvtps_pd(fVal);
135 _mm_store_pd(iBufferPtr, dVal);
140 number = halfPoints * 2;
141 for (; number < num_points; number++) {
142 *iBufferPtr++ = (double)*complexVectorPtr++;
148 #ifdef LV_HAVE_GENERIC
151 double *iBuffer,
const lv_32fc_t *complexVector,
unsigned int num_points) {
152 unsigned int number = 0;
153 const float *complexVectorPtr = (
float *)complexVector;
154 double *iBufferPtr = iBuffer;
155 for (number = 0; number < num_points; number++) {
156 *iBufferPtr++ = (double)*complexVectorPtr++;
162 #ifdef LV_HAVE_NEONV8
163 #include <arm_neon.h>
165 static inline void volk_32fc_deinterleave_real_64f_neon(
166 double *iBuffer,
const lv_32fc_t *complexVector,
unsigned int num_points) {
167 unsigned int number = 0;
168 unsigned int quarter_points = num_points / 4;
169 const float *complexVectorPtr = (
float *)complexVector;
170 double *iBufferPtr = iBuffer;
171 float32x2x4_t complexInput;
176 for (number = 0; number < quarter_points; number++) {
178 complexInput = vld4_f32(complexVectorPtr);
181 iVal1 = vcvt_f64_f32(complexInput.val[0]);
182 iVal2 = vcvt_f64_f32(complexInput.val[2]);
187 vst2q_f64(iBufferPtr, iVal);
191 complexVectorPtr += 8;
194 for (number = quarter_points * 4; number < num_points; number++) {
195 *iBufferPtr++ = (double)*complexVectorPtr++;
203 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
204 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
206 #include <inttypes.h>
210 #include <immintrin.h>
212 static inline void volk_32fc_deinterleave_real_64f_u_avx2(
213 double *iBuffer,
const lv_32fc_t *complexVector,
unsigned int num_points) {
214 unsigned int number = 0;
216 const float *complexVectorPtr = (
float *)complexVector;
217 double *iBufferPtr = iBuffer;
219 const unsigned int quarterPoints = num_points / 4;
223 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
224 for (; number < quarterPoints; number++) {
226 cplxValue = _mm256_loadu_ps(complexVectorPtr);
227 complexVectorPtr += 8;
230 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
231 fVal = _mm256_extractf128_ps(cplxValue, 0);
232 dVal = _mm256_cvtps_pd(fVal);
233 _mm256_storeu_pd(iBufferPtr, dVal);
238 number = quarterPoints * 4;
239 for (; number < num_points; number++) {
240 *iBufferPtr++ = (double)*complexVectorPtr++;