/external/libopus/celt/x86/ |
pitch_sse.h | 43 xsum1 = _mm_loadu_ps(sum); 48 __m128 x0 = _mm_loadu_ps(x+j); 49 __m128 yj = _mm_loadu_ps(y+j); 50 __m128 y3 = _mm_loadu_ps(y+j+3); 61 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); 64 xsum2 = _mm_add_ps(xsum2,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); 67 xsum1 = _mm_add_ps(xsum1,_mm_mul_ps(_mm_load1_ps(x+j),_mm_loadu_ps(y+j))); 84 __m128 xi = _mm_loadu_ps(x+i); 85 __m128 y1i = _mm_loadu_ps(y01+i); 86 __m128 y2i = _mm_loadu_ps(y02+i) [all...] |
/external/speex/libspeex/ |
resample_sse.h | 47 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i))); 48 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4))); 61 __m128 f = _mm_loadu_ps(frac); 64 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversample))); 65 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*oversample))); 86 t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)); 90 t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)) [all...] |
ltp_sse.h | 45 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0))); 46 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4))); 68 x[i] = _mm_loadu_ps(_x+(i<<2)); 72 y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
|
filters_sse.h | 46 mem[i] = _mm_loadu_ps(_mem+4*i); 47 num[i] = _mm_loadu_ps(_num+4*i); 48 den[i] = _mm_loadu_ps(_den+4*i); 99 mem[i] = _mm_loadu_ps(_mem+4*i); 100 num[i] = _mm_loadu_ps(_num+4*i); 101 den[i] = _mm_loadu_ps(_den+4*i); 153 mem[i] = _mm_loadu_ps(_mem+4*i); 154 den[i] = _mm_loadu_ps(_den+4*i); 202 mem[i] = _mm_loadu_ps(_mem+4*i); 203 den[i] = _mm_loadu_ps(_den+4*i) [all...] |
/external/webrtc/webrtc/modules/audio_processing/aec/ |
aec_core_sse2.c | 51 const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); 52 const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); 53 const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); 54 const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); 55 const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]); 56 const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]); 97 const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); 98 const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); 99 const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); 170 const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]) [all...] |
aec_rdft_sse2.c | 23 __m128 a00v = _mm_loadu_ps(&a[j + 0]); 24 __m128 a04v = _mm_loadu_ps(&a[j + 4]); 25 __m128 a08v = _mm_loadu_ps(&a[j + 8]); 26 __m128 a12v = _mm_loadu_ps(&a[j + 12]); 253 const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, 254 const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, 260 const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, 261 const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, 262 const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, 263 const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127 [all...] |
/external/clang/test/Sema/ |
uninit-variables-vectors.c | 6 __m128 _mm_loadu_ps(const float *p); 14 X = _mm_loadu_ps(&input[0]);
|
/external/opencv3/modules/imgproc/src/ |
corner.cpp | 75 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x 76 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x 77 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x 78 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x 145 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x 146 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x 147 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x 148 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x 330 __m128 v_dx_0 = _mm_loadu_ps(dxdata + j); 331 __m128 v_dx_1 = _mm_loadu_ps(dxdata + j + 4) [all...] |
imgwarp.cpp | 593 x0 = _mm_loadu_ps(S0 + x); 594 x1 = _mm_loadu_ps(S0 + x + 4); 595 y0 = _mm_loadu_ps(S1 + x); 596 y1 = _mm_loadu_ps(S1 + x + 4); 604 x0 = _mm_loadu_ps(S0 + x + 8); 605 x1 = _mm_loadu_ps(S0 + x + 12); 606 y0 = _mm_loadu_ps(S1 + x + 8); 607 y1 = _mm_loadu_ps(S1 + x + 12); 623 x0 = _mm_loadu_ps(S0 + x); 624 y0 = _mm_loadu_ps(S1 + x) [all...] |
filter.cpp | [all...] |
thresh.cpp | 730 v0 = _mm_loadu_ps( src + j ); 731 v1 = _mm_loadu_ps( src + j + 4 ); 768 v0 = _mm_loadu_ps( src + j ); 769 v1 = _mm_loadu_ps( src + j + 4 ); 806 v0 = _mm_loadu_ps( src + j ); 807 v1 = _mm_loadu_ps( src + j + 4 ); 837 v0 = _mm_loadu_ps( src + j ); 838 v1 = _mm_loadu_ps( src + j + 4 ); 876 v0 = _mm_loadu_ps( src + j ); 877 v1 = _mm_loadu_ps( src + j + 4 ) [all...] |
morph.cpp | 155 __m128 s = _mm_loadu_ps((const float*)src + i); 158 __m128 x = _mm_loadu_ps((const float*)src + i + k); 469 __m128 s0 = _mm_loadu_ps(sptr); 470 __m128 s1 = _mm_loadu_ps(sptr + 4); 471 __m128 s2 = _mm_loadu_ps(sptr + 8); 472 __m128 s3 = _mm_loadu_ps(sptr + 12); 478 x0 = _mm_loadu_ps(sptr); 479 x1 = _mm_loadu_ps(sptr + 4); 480 x2 = _mm_loadu_ps(sptr + 8); 481 x3 = _mm_loadu_ps(sptr + 12) [all...] |
color.cpp | [all...] |
pyramids.cpp | 499 __m128 v_r0 = _mm_loadu_ps(row0 + x); 500 __m128 v_r1 = _mm_loadu_ps(row1 + x); 501 __m128 v_r2 = _mm_loadu_ps(row2 + x); 506 v_r0 = _mm_loadu_ps(row0 + x + 4); 507 v_r1 = _mm_loadu_ps(row1 + x + 4); 508 v_r2 = _mm_loadu_ps(row2 + x + 4); [all...] |
/external/opencv3/modules/hal/src/ |
stat.cpp | 193 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); 194 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4)); 229 __m128 t0 = _mm_sub_ps(_mm_loadu_ps(a + j), _mm_loadu_ps(b + j)); 230 __m128 t1 = _mm_sub_ps(_mm_loadu_ps(a + j + 4), _mm_loadu_ps(b + j + 4));
|
mathfuncs.cpp | 85 __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i); 479 xf0 = _mm_loadu_ps(&x[i].f); 480 xf1 = _mm_loadu_ps(&x[i+4].f); [all...] |
/external/webrtc/webrtc/common_audio/resampler/ |
sinc_resampler_sse.cc | 31 m_input = _mm_loadu_ps(input_ptr + i);
|
/external/opencv3/modules/video/src/ |
optflowgf.cpp | 468 s0 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x), g4); 469 s1 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 4), g4); 470 s2 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 8), g4); 471 s3 = _mm_mul_ps(_mm_loadu_ps(sptr0 + x + 12), g4); 478 x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x), _mm_loadu_ps(sptr1 + x)); 479 x1 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 4), _mm_loadu_ps(sptr1 + x + 4)); 482 x0 = _mm_add_ps(_mm_loadu_ps(sptr0 + x + 8), _mm_loadu_ps(sptr1 + x + 8)) [all...] |
/external/webrtc/webrtc/common_audio/ |
fir_filter_sse.cc | 63 m_in = _mm_loadu_ps(in_ptr + j);
|
/external/eigen/Eigen/src/Core/arch/SSE/ |
PacketMath.h | 223 return _mm_loadu_ps(from); 251 // NOTE Some version of MSVC10 generates bad code when using _mm_loadu_ps 253 // TODO On most architectures this version should also be faster than a single _mm_loadu_ps 259 return _mm_loadu_ps(from); 286 return _mm_loadu_ps(from);
|
/external/opencv3/modules/objdetect/src/ |
hog.cpp | 421 __m128 _mag = _mm_loadu_ps(dbuf + x + (width << 1)); 422 __m128 _angle = _mm_loadu_ps(dbuf + x + width * 3); 826 __m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights)); 874 __m128 _w = _mm_mul_ps(_mm_set1_ps(pk.gradWeight), _mm_loadu_ps(pk.histWeights)); [all...] |
/external/opencv3/modules/core/src/ |
mathfuncs.cpp | 280 __m128 v_src = _mm_loadu_ps(buf[0] + k); 417 __m128 v_src = _mm_loadu_ps(buf[0] + k); 510 __m128 v_angle = _mm_loadu_ps(angle + i); 701 __m128 v_m = _mm_loadu_ps(mag + k); 702 _mm_storeu_ps(x + k, _mm_mul_ps(_mm_loadu_ps(x + k), v_m)); 703 _mm_storeu_ps(y + k, _mm_mul_ps(_mm_loadu_ps(y + k), v_m)); [all...] |
rand.cpp | 232 __m128 q0 = _mm_loadu_ps((const float*)(p + i)); 233 __m128 q1 = _mm_loadu_ps((const float*)(p + i + 2)); 241 _mm_storeu_ps(arr + i, _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(f), p0), p1));
|
/frameworks/rs/cpu_ref/ |
rsCpuIntrinsics_x86.cpp | 387 pf = _mm_mul_ps(g0, _mm_loadu_ps(pi)); 390 gx = _mm_loadu_ps((const float *)gptr + r); 391 p0 = _mm_loadu_ps(pi + r); 392 p1 = _mm_loadu_ps(pi + r + 4); [all...] |
/prebuilts/gcc/linux-x86/host/x86_64-w64-mingw32-4.8/x86_64-w64-mingw32/include/ |
fvec.h | 118 inline void loadu(F32vec4 &a,float *p) { a = _mm_loadu_ps(p); }
|