1 /* 2 * Copyright 2015 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "Sk4px.h" 9 #include "SkNx.h" 10 #include "SkRandom.h" 11 #include "Test.h" 12 13 template <int N> 14 static void test_Nf(skiatest::Reporter* r) { 15 16 auto assert_nearly_eq = [&](float eps, const SkNx<N, float>& v, 17 float a, float b, float c, float d) { 18 auto close = [=](float a, float b) { return fabsf(a-b) <= eps; }; 19 float vals[4]; 20 v.store(vals); 21 bool ok = close(vals[0], a) && close(vals[1], b) 22 && close( v[0], a) && close( v[1], b); 23 REPORTER_ASSERT(r, ok); 24 if (N == 4) { 25 ok = close(vals[2], c) && close(vals[3], d) 26 && close( v[2], c) && close( v[3], d); 27 REPORTER_ASSERT(r, ok); 28 } 29 }; 30 auto assert_eq = [&](const SkNx<N, float>& v, float a, float b, float c, float d) { 31 return assert_nearly_eq(0, v, a,b,c,d); 32 }; 33 34 float vals[] = {3, 4, 5, 6}; 35 SkNx<N,float> a = SkNx<N,float>::Load(vals), 36 b(a), 37 c = a; 38 SkNx<N,float> d; 39 d = a; 40 41 assert_eq(a, 3, 4, 5, 6); 42 assert_eq(b, 3, 4, 5, 6); 43 assert_eq(c, 3, 4, 5, 6); 44 assert_eq(d, 3, 4, 5, 6); 45 46 assert_eq(a+b, 6, 8, 10, 12); 47 assert_eq(a*b, 9, 16, 25, 36); 48 assert_eq(a*b-b, 6, 12, 20, 30); 49 assert_eq((a*b).sqrt(), 3, 4, 5, 6); 50 assert_eq(a/b, 1, 1, 1, 1); 51 assert_eq(SkNx<N,float>(0)-a, -3, -4, -5, -6); 52 53 SkNx<N,float> fours(4); 54 55 assert_eq(fours.sqrt(), 2,2,2,2); 56 assert_nearly_eq(0.001f, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5); 57 58 assert_nearly_eq(0.001f, fours.invert(), 0.25, 0.25, 0.25, 0.25); 59 60 assert_eq(SkNx<N,float>::Min(a, fours), 3, 4, 4, 4); 61 assert_eq(SkNx<N,float>::Max(a, fours), 4, 4, 5, 6); 62 63 // Test some comparisons. This is not exhaustive. 64 REPORTER_ASSERT(r, (a == b).allTrue()); 65 REPORTER_ASSERT(r, (a+b == a*b-b).anyTrue()); 66 REPORTER_ASSERT(r, !(a+b == a*b-b).allTrue()); 67 REPORTER_ASSERT(r, !(a+b == a*b).anyTrue()); 68 REPORTER_ASSERT(r, !(a != b).anyTrue()); 69 REPORTER_ASSERT(r, (a < fours).anyTrue()); 70 REPORTER_ASSERT(r, (a <= fours).anyTrue()); 71 REPORTER_ASSERT(r, !(a > fours).allTrue()); 72 REPORTER_ASSERT(r, !(a >= fours).allTrue()); 73 } 74 75 DEF_TEST(SkNf, r) { 76 test_Nf<2>(r); 77 test_Nf<4>(r); 78 } 79 80 template <int N, typename T> 81 void test_Ni(skiatest::Reporter* r) { 82 auto assert_eq = [&](const SkNx<N,T>& v, T a, T b, T c, T d, T e, T f, T g, T h) { 83 T vals[8]; 84 v.store(vals); 85 86 switch (N) { 87 case 8: REPORTER_ASSERT(r, vals[4] == e && vals[5] == f && vals[6] == g && vals[7] == h); 88 case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d); 89 case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b); 90 } 91 switch (N) { 92 case 8: REPORTER_ASSERT(r, v[4] == e && v[5] == f && 93 v[6] == g && v[7] == h); 94 case 4: REPORTER_ASSERT(r, v[2] == c && v[3] == d); 95 case 2: REPORTER_ASSERT(r, v[0] == a && v[1] == b); 96 } 97 }; 98 99 T vals[] = { 1,2,3,4,5,6,7,8 }; 100 SkNx<N,T> a = SkNx<N,T>::Load(vals), 101 b(a), 102 c = a; 103 SkNx<N,T> d; 104 d = a; 105 106 assert_eq(a, 1,2,3,4,5,6,7,8); 107 assert_eq(b, 1,2,3,4,5,6,7,8); 108 assert_eq(c, 1,2,3,4,5,6,7,8); 109 assert_eq(d, 1,2,3,4,5,6,7,8); 110 111 assert_eq(a+a, 2,4,6,8,10,12,14,16); 112 assert_eq(a*a, 1,4,9,16,25,36,49,64); 113 assert_eq(a*a-a, 0,2,6,12,20,30,42,56); 114 115 assert_eq(a >> 2, 0,0,0,1,1,1,1,2); 116 assert_eq(a << 1, 2,4,6,8,10,12,14,16); 117 118 REPORTER_ASSERT(r, a[1] == 2); 119 } 120 121 DEF_TEST(SkNx, r) { 122 test_Ni<2, uint16_t>(r); 123 test_Ni<4, uint16_t>(r); 124 test_Ni<8, uint16_t>(r); 125 126 test_Ni<2, int>(r); 127 test_Ni<4, int>(r); 128 test_Ni<8, int>(r); 129 } 130 131 DEF_TEST(SkNi_min_lt, r) { 132 // Exhaustively check the 8x8 bit space. 133 for (int a = 0; a < (1<<8); a++) { 134 for (int b = 0; b < (1<<8); b++) { 135 Sk16b aw(a), bw(b); 136 REPORTER_ASSERT(r, Sk16b::Min(aw, bw)[0] == SkTMin(a, b)); 137 REPORTER_ASSERT(r, !(aw < bw)[0] == !(a < b)); 138 }} 139 140 // Exhausting the 16x16 bit space is kind of slow, so only do that in release builds. 141 #ifdef SK_DEBUG 142 SkRandom rand; 143 for (int i = 0; i < (1<<16); i++) { 144 uint16_t a = rand.nextU() >> 16, 145 b = rand.nextU() >> 16; 146 REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b)); 147 } 148 #else 149 for (int a = 0; a < (1<<16); a++) { 150 for (int b = 0; b < (1<<16); b++) { 151 REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b)); 152 }} 153 #endif 154 } 155 156 DEF_TEST(SkNi_saturatedAdd, r) { 157 for (int a = 0; a < (1<<8); a++) { 158 for (int b = 0; b < (1<<8); b++) { 159 int exact = a+b; 160 if (exact > 255) { exact = 255; } 161 if (exact < 0) { exact = 0; } 162 163 REPORTER_ASSERT(r, Sk16b(a).saturatedAdd(Sk16b(b))[0] == exact); 164 } 165 } 166 } 167 168 DEF_TEST(SkNi_mulHi, r) { 169 // First 8 primes. 170 Sk4u a{ 0x00020000, 0x00030000, 0x00050000, 0x00070000 }; 171 Sk4u b{ 0x000b0000, 0x000d0000, 0x00110000, 0x00130000 }; 172 173 Sk4u q{22, 39, 85, 133}; 174 175 Sk4u c = a.mulHi(b); 176 REPORTER_ASSERT(r, c[0] == q[0]); 177 REPORTER_ASSERT(r, c[1] == q[1]); 178 REPORTER_ASSERT(r, c[2] == q[2]); 179 REPORTER_ASSERT(r, c[3] == q[3]); 180 } 181 182 DEF_TEST(Sk4px_muldiv255round, r) { 183 for (int a = 0; a < (1<<8); a++) { 184 for (int b = 0; b < (1<<8); b++) { 185 int exact = (a*b+127)/255; 186 187 // Duplicate a and b 16x each. 188 auto av = Sk4px::DupAlpha(a), 189 bv = Sk4px::DupAlpha(b); 190 191 // This way should always be exactly correct. 192 int correct = (av * bv).div255()[0]; 193 REPORTER_ASSERT(r, correct == exact); 194 195 // We're a bit more flexible on this method: correct for 0 or 255, otherwise off by <=1. 196 int fast = av.approxMulDiv255(bv)[0]; 197 REPORTER_ASSERT(r, fast-exact >= -1 && fast-exact <= 1); 198 if (a == 0 || a == 255 || b == 0 || b == 255) { 199 REPORTER_ASSERT(r, fast == exact); 200 } 201 } 202 } 203 } 204 205 DEF_TEST(Sk4px_widening, r) { 206 SkPMColor colors[] = { 207 SkPreMultiplyColor(0xff00ff00), 208 SkPreMultiplyColor(0x40008000), 209 SkPreMultiplyColor(0x7f020406), 210 SkPreMultiplyColor(0x00000000), 211 }; 212 auto packed = Sk4px::Load4(colors); 213 214 auto wideLo = packed.widenLo(), 215 wideHi = packed.widenHi(), 216 wideLoHi = packed.widenLoHi(), 217 wideLoHiAlt = wideLo + wideHi; 218 REPORTER_ASSERT(r, 0 == memcmp(&wideLoHi, &wideLoHiAlt, sizeof(wideLoHi))); 219 } 220 221 DEF_TEST(SkNx_abs, r) { 222 auto fs = Sk4f(0.0f, -0.0f, 2.0f, -4.0f).abs(); 223 REPORTER_ASSERT(r, fs[0] == 0.0f); 224 REPORTER_ASSERT(r, fs[1] == 0.0f); 225 REPORTER_ASSERT(r, fs[2] == 2.0f); 226 REPORTER_ASSERT(r, fs[3] == 4.0f); 227 auto fshi = Sk2f(0.0f, -0.0f).abs(); 228 auto fslo = Sk2f(2.0f, -4.0f).abs(); 229 REPORTER_ASSERT(r, fshi[0] == 0.0f); 230 REPORTER_ASSERT(r, fshi[1] == 0.0f); 231 REPORTER_ASSERT(r, fslo[0] == 2.0f); 232 REPORTER_ASSERT(r, fslo[1] == 4.0f); 233 } 234 235 DEF_TEST(Sk4i_abs, r) { 236 auto is = Sk4i(0, -1, 2, -2147483647).abs(); 237 REPORTER_ASSERT(r, is[0] == 0); 238 REPORTER_ASSERT(r, is[1] == 1); 239 REPORTER_ASSERT(r, is[2] == 2); 240 REPORTER_ASSERT(r, is[3] == 2147483647); 241 } 242 243 DEF_TEST(Sk4i_minmax, r) { 244 auto a = Sk4i(0, 2, 4, 6); 245 auto b = Sk4i(1, 1, 3, 7); 246 auto min = Sk4i::Min(a, b); 247 auto max = Sk4i::Max(a, b); 248 for(int i = 0; i < 4; ++i) { 249 REPORTER_ASSERT(r, min[i] == SkTMin(a[i], b[i])); 250 REPORTER_ASSERT(r, max[i] == SkTMax(a[i], b[i])); 251 } 252 } 253 254 DEF_TEST(SkNx_floor, r) { 255 auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor(); 256 REPORTER_ASSERT(r, fs[0] == 0.0f); 257 REPORTER_ASSERT(r, fs[1] == -1.0f); 258 REPORTER_ASSERT(r, fs[2] == 0.0f); 259 REPORTER_ASSERT(r, fs[3] == -1.0f); 260 } 261 262 DEF_TEST(SkNx_shuffle, r) { 263 Sk4f f4(0,10,20,30); 264 265 Sk2f f2 = SkNx_shuffle<2,1>(f4); 266 REPORTER_ASSERT(r, f2[0] == 20); 267 REPORTER_ASSERT(r, f2[1] == 10); 268 269 f4 = SkNx_shuffle<0,1,1,0>(f2); 270 REPORTER_ASSERT(r, f4[0] == 20); 271 REPORTER_ASSERT(r, f4[1] == 10); 272 REPORTER_ASSERT(r, f4[2] == 10); 273 REPORTER_ASSERT(r, f4[3] == 20); 274 } 275 276 DEF_TEST(SkNx_int_float, r) { 277 Sk4f f(-2.3f, 1.0f, 0.45f, 0.6f); 278 279 Sk4i i = SkNx_cast<int>(f); 280 REPORTER_ASSERT(r, i[0] == -2); 281 REPORTER_ASSERT(r, i[1] == 1); 282 REPORTER_ASSERT(r, i[2] == 0); 283 REPORTER_ASSERT(r, i[3] == 0); 284 285 f = SkNx_cast<float>(i); 286 REPORTER_ASSERT(r, f[0] == -2.0f); 287 REPORTER_ASSERT(r, f[1] == 1.0f); 288 REPORTER_ASSERT(r, f[2] == 0.0f); 289 REPORTER_ASSERT(r, f[3] == 0.0f); 290 } 291 292 #include "SkRandom.h" 293 294 DEF_TEST(SkNx_u16_float, r) { 295 { 296 // u16 --> float 297 auto h4 = Sk4h(15, 17, 257, 65535); 298 auto f4 = SkNx_cast<float>(h4); 299 REPORTER_ASSERT(r, f4[0] == 15.0f); 300 REPORTER_ASSERT(r, f4[1] == 17.0f); 301 REPORTER_ASSERT(r, f4[2] == 257.0f); 302 REPORTER_ASSERT(r, f4[3] == 65535.0f); 303 } 304 { 305 // float -> u16 306 auto f4 = Sk4f(15, 17, 257, 65535); 307 auto h4 = SkNx_cast<uint16_t>(f4); 308 REPORTER_ASSERT(r, h4[0] == 15); 309 REPORTER_ASSERT(r, h4[1] == 17); 310 REPORTER_ASSERT(r, h4[2] == 257); 311 REPORTER_ASSERT(r, h4[3] == 65535); 312 } 313 314 // starting with any u16 value, we should be able to have a perfect round-trip in/out of floats 315 // 316 SkRandom rand; 317 for (int i = 0; i < 10000; ++i) { 318 const uint16_t s16[4] { 319 (uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(), 320 (uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(), 321 }; 322 auto u4_0 = Sk4h::Load(s16); 323 auto f4 = SkNx_cast<float>(u4_0); 324 auto u4_1 = SkNx_cast<uint16_t>(f4); 325 uint16_t d16[4]; 326 u4_1.store(d16); 327 REPORTER_ASSERT(r, !memcmp(s16, d16, sizeof(s16))); 328 } 329 } 330 331 // The SSE2 implementation of SkNx_cast<uint16_t>(Sk4i) is non-trivial, so worth a test. 332 DEF_TEST(SkNx_int_u16, r) { 333 // These are pretty hard to get wrong. 334 for (int i = 0; i <= 0x7fff; i++) { 335 uint16_t expected = (uint16_t)i; 336 uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0]; 337 338 REPORTER_ASSERT(r, expected == actual); 339 } 340 341 // A naive implementation with _mm_packs_epi32 would succeed up to 0x7fff but fail here: 342 for (int i = 0x8000; (1) && i <= 0xffff; i++) { 343 uint16_t expected = (uint16_t)i; 344 uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0]; 345 346 REPORTER_ASSERT(r, expected == actual); 347 } 348 } 349 350 DEF_TEST(SkNx_4fLoad4Store4, r) { 351 float src[] = { 352 0.0f, 1.0f, 2.0f, 3.0f, 353 4.0f, 5.0f, 6.0f, 7.0f, 354 8.0f, 9.0f, 10.0f, 11.0f, 355 12.0f, 13.0f, 14.0f, 15.0f 356 }; 357 358 Sk4f a, b, c, d; 359 Sk4f::Load4(src, &a, &b, &c, &d); 360 REPORTER_ASSERT(r, 0.0f == a[0]); 361 REPORTER_ASSERT(r, 4.0f == a[1]); 362 REPORTER_ASSERT(r, 8.0f == a[2]); 363 REPORTER_ASSERT(r, 12.0f == a[3]); 364 REPORTER_ASSERT(r, 1.0f == b[0]); 365 REPORTER_ASSERT(r, 5.0f == b[1]); 366 REPORTER_ASSERT(r, 9.0f == b[2]); 367 REPORTER_ASSERT(r, 13.0f == b[3]); 368 REPORTER_ASSERT(r, 2.0f == c[0]); 369 REPORTER_ASSERT(r, 6.0f == c[1]); 370 REPORTER_ASSERT(r, 10.0f == c[2]); 371 REPORTER_ASSERT(r, 14.0f == c[3]); 372 REPORTER_ASSERT(r, 3.0f == d[0]); 373 REPORTER_ASSERT(r, 7.0f == d[1]); 374 REPORTER_ASSERT(r, 11.0f == d[2]); 375 REPORTER_ASSERT(r, 15.0f == d[3]); 376 377 float dst[16]; 378 Sk4f::Store4(dst, a, b, c, d); 379 REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float))); 380 } 381 382 DEF_TEST(SkNx_neg, r) { 383 auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f); 384 REPORTER_ASSERT(r, fs[0] == 0.0f); 385 REPORTER_ASSERT(r, fs[1] == 0.0f); 386 REPORTER_ASSERT(r, fs[2] == -2.0f); 387 REPORTER_ASSERT(r, fs[3] == 4.0f); 388 auto fshi = -Sk2f(0.0f, -0.0f); 389 auto fslo = -Sk2f(2.0f, -4.0f); 390 REPORTER_ASSERT(r, fshi[0] == 0.0f); 391 REPORTER_ASSERT(r, fshi[1] == 0.0f); 392 REPORTER_ASSERT(r, fslo[0] == -2.0f); 393 REPORTER_ASSERT(r, fslo[1] == 4.0f); 394 } 395 396 DEF_TEST(SkNx_thenElse, r) { 397 auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1); 398 REPORTER_ASSERT(r, fs[0] == 1); 399 REPORTER_ASSERT(r, fs[1] == 1); 400 REPORTER_ASSERT(r, fs[2] == 1); 401 REPORTER_ASSERT(r, fs[3] == -1); 402 auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1); 403 auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1); 404 REPORTER_ASSERT(r, fshi[0] == 1); 405 REPORTER_ASSERT(r, fshi[1] == 1); 406 REPORTER_ASSERT(r, fslo[0] == 1); 407 REPORTER_ASSERT(r, fslo[1] == -1); 408 } 409 410 DEF_TEST(Sk4f_Load2, r) { 411 float xy[8] = { 0,1,2,3,4,5,6,7 }; 412 413 Sk4f x,y; 414 Sk4f::Load2(xy, &x,&y); 415 416 REPORTER_ASSERT(r, x[0] == 0); 417 REPORTER_ASSERT(r, x[1] == 2); 418 REPORTER_ASSERT(r, x[2] == 4); 419 REPORTER_ASSERT(r, x[3] == 6); 420 421 REPORTER_ASSERT(r, y[0] == 1); 422 REPORTER_ASSERT(r, y[1] == 3); 423 REPORTER_ASSERT(r, y[2] == 5); 424 REPORTER_ASSERT(r, y[3] == 7); 425 } 426 427 DEF_TEST(Sk2f_Store3, r) { 428 Sk2f p0{0, 3}; 429 Sk2f p1{1, 4}; 430 Sk2f p2{2, 5}; 431 float dst[6]; 432 Sk2f::Store3(dst, p0, p1, p2); 433 REPORTER_ASSERT(r, dst[0] == 0); 434 REPORTER_ASSERT(r, dst[1] == 1); 435 REPORTER_ASSERT(r, dst[2] == 2); 436 REPORTER_ASSERT(r, dst[3] == 3); 437 REPORTER_ASSERT(r, dst[4] == 4); 438 REPORTER_ASSERT(r, dst[5] == 5); 439 } 440