97 unsigned int number = 0;
98 const unsigned int eighthPoints = num_points / 8;
100 float* cPtr = cVector;
101 const float* aPtr = aVector;
104 for (; number < eighthPoints; number++) {
105 aVal = _mm256_load_ps(aPtr);
107 cVal = _mm256_sqrt_ps(aVal);
109 _mm256_store_ps(cPtr, cVal);
115 number = eighthPoints * 8;
116 for (; number < num_points; number++) {
117 *cPtr++ = sqrtf(*aPtr++);
130 float* cPtr = cVector;
131 const float* aPtr = aVector;
132 unsigned int number = 0;
133 unsigned int quarter_points = num_points / 4;
134 float32x4_t in_vec, out_vec;
136 for (number = 0; number < quarter_points; number++) {
137 in_vec = vld1q_f32(aPtr);
139 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec));
140 vst1q_f32(cPtr, out_vec);
145 for (number = quarter_points * 4; number < num_points; number++) {
146 *cPtr++ = sqrtf(*aPtr++);
183 unsigned int number = 0;
184 const unsigned int eighthPoints = num_points / 8;
186 float* cPtr = cVector;
187 const float* aPtr = aVector;
190 for (; number < eighthPoints; number++) {
191 aVal = _mm256_loadu_ps(aPtr);
193 cVal = _mm256_sqrt_ps(aVal);
195 _mm256_storeu_ps(cPtr, cVal);
201 number = eighthPoints * 8;
202 for (; number < num_points; number++) {
203 *cPtr++ = sqrtf(*aPtr++);