1 /* 2 * ARM NEON vector operations. 3 * 4 * Copyright (c) 2007, 2008 CodeSourcery. 5 * Written by Paul Brook 6 * 7 * This code is licenced under the GNU GPL v2. 8 */ 9 #include <stdlib.h> 10 #include <stdio.h> 11 12 #include "cpu.h" 13 #include "exec-all.h" 14 #include "helpers.h" 15 16 #define SIGNBIT (uint32_t)0x80000000 17 #define SIGNBIT64 ((uint64_t)1 << 63) 18 19 #define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] = CPSR_Q 20 21 static float_status neon_float_status; 22 #define NFS &neon_float_status 23 24 /* Helper routines to perform bitwise copies between float and int. */ 25 static inline float32 vfp_itos(uint32_t i) 26 { 27 union { 28 uint32_t i; 29 float32 s; 30 } v; 31 32 v.i = i; 33 return v.s; 34 } 35 36 static inline uint32_t vfp_stoi(float32 s) 37 { 38 union { 39 uint32_t i; 40 float32 s; 41 } v; 42 43 v.s = s; 44 return v.i; 45 } 46 47 #define NEON_TYPE1(name, type) \ 48 typedef struct \ 49 { \ 50 type v1; \ 51 } neon_##name; 52 #ifdef HOST_WORDS_BIGENDIAN 53 #define NEON_TYPE2(name, type) \ 54 typedef struct \ 55 { \ 56 type v2; \ 57 type v1; \ 58 } neon_##name; 59 #define NEON_TYPE4(name, type) \ 60 typedef struct \ 61 { \ 62 type v4; \ 63 type v3; \ 64 type v2; \ 65 type v1; \ 66 } neon_##name; 67 #else 68 #define NEON_TYPE2(name, type) \ 69 typedef struct \ 70 { \ 71 type v1; \ 72 type v2; \ 73 } neon_##name; 74 #define NEON_TYPE4(name, type) \ 75 typedef struct \ 76 { \ 77 type v1; \ 78 type v2; \ 79 type v3; \ 80 type v4; \ 81 } neon_##name; 82 #endif 83 84 NEON_TYPE4(s8, int8_t) 85 NEON_TYPE4(u8, uint8_t) 86 NEON_TYPE2(s16, int16_t) 87 NEON_TYPE2(u16, uint16_t) 88 NEON_TYPE1(s32, int32_t) 89 NEON_TYPE1(u32, uint32_t) 90 #undef NEON_TYPE4 91 #undef NEON_TYPE2 92 #undef NEON_TYPE1 93 94 /* Copy from a uint32_t to a vector structure type. */ 95 #define NEON_UNPACK(vtype, dest, val) do { \ 96 union { \ 97 vtype v; \ 98 uint32_t i; \ 99 } conv_u; \ 100 conv_u.i = (val); \ 101 dest = conv_u.v; \ 102 } while(0) 103 104 /* Copy from a vector structure type to a uint32_t. */ 105 #define NEON_PACK(vtype, dest, val) do { \ 106 union { \ 107 vtype v; \ 108 uint32_t i; \ 109 } conv_u; \ 110 conv_u.v = (val); \ 111 dest = conv_u.i; \ 112 } while(0) 113 114 #define NEON_DO1 \ 115 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); 116 #define NEON_DO2 \ 117 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ 118 NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); 119 #define NEON_DO4 \ 120 NEON_FN(vdest.v1, vsrc1.v1, vsrc2.v1); \ 121 NEON_FN(vdest.v2, vsrc1.v2, vsrc2.v2); \ 122 NEON_FN(vdest.v3, vsrc1.v3, vsrc2.v3); \ 123 NEON_FN(vdest.v4, vsrc1.v4, vsrc2.v4); 124 125 #define NEON_VOP_BODY(vtype, n) \ 126 { \ 127 uint32_t res; \ 128 vtype vsrc1; \ 129 vtype vsrc2; \ 130 vtype vdest; \ 131 NEON_UNPACK(vtype, vsrc1, arg1); \ 132 NEON_UNPACK(vtype, vsrc2, arg2); \ 133 NEON_DO##n; \ 134 NEON_PACK(vtype, res, vdest); \ 135 return res; \ 136 } 137 138 #define NEON_VOP(name, vtype, n) \ 139 uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ 140 NEON_VOP_BODY(vtype, n) 141 142 #define NEON_VOP_ENV(name, vtype, n) \ 143 uint32_t HELPER(glue(neon_,name))(CPUState *env, uint32_t arg1, uint32_t arg2) \ 144 NEON_VOP_BODY(vtype, n) 145 146 /* Pairwise operations. */ 147 /* For 32-bit elements each segment only contains a single element, so 148 the elementwise and pairwise operations are the same. */ 149 #define NEON_PDO2 \ 150 NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ 151 NEON_FN(vdest.v2, vsrc2.v1, vsrc2.v2); 152 #define NEON_PDO4 \ 153 NEON_FN(vdest.v1, vsrc1.v1, vsrc1.v2); \ 154 NEON_FN(vdest.v2, vsrc1.v3, vsrc1.v4); \ 155 NEON_FN(vdest.v3, vsrc2.v1, vsrc2.v2); \ 156 NEON_FN(vdest.v4, vsrc2.v3, vsrc2.v4); \ 157 158 #define NEON_POP(name, vtype, n) \ 159 uint32_t HELPER(glue(neon_,name))(uint32_t arg1, uint32_t arg2) \ 160 { \ 161 uint32_t res; \ 162 vtype vsrc1; \ 163 vtype vsrc2; \ 164 vtype vdest; \ 165 NEON_UNPACK(vtype, vsrc1, arg1); \ 166 NEON_UNPACK(vtype, vsrc2, arg2); \ 167 NEON_PDO##n; \ 168 NEON_PACK(vtype, res, vdest); \ 169 return res; \ 170 } 171 172 /* Unary operators. */ 173 #define NEON_VOP1(name, vtype, n) \ 174 uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ 175 { \ 176 vtype vsrc1; \ 177 vtype vdest; \ 178 NEON_UNPACK(vtype, vsrc1, arg); \ 179 NEON_DO##n; \ 180 NEON_PACK(vtype, arg, vdest); \ 181 return arg; \ 182 } 183 184 185 #define NEON_USAT(dest, src1, src2, type) do { \ 186 uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ 187 if (tmp != (type)tmp) { \ 188 SET_QC(); \ 189 dest = ~0; \ 190 } else { \ 191 dest = tmp; \ 192 }} while(0) 193 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) 194 NEON_VOP_ENV(qadd_u8, neon_u8, 4) 195 #undef NEON_FN 196 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) 197 NEON_VOP_ENV(qadd_u16, neon_u16, 2) 198 #undef NEON_FN 199 #undef NEON_USAT 200 201 #define NEON_SSAT(dest, src1, src2, type) do { \ 202 int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ 203 if (tmp != (type)tmp) { \ 204 SET_QC(); \ 205 if (src2 > 0) { \ 206 tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ 207 } else { \ 208 tmp = 1 << (sizeof(type) * 8 - 1); \ 209 } \ 210 } \ 211 dest = tmp; \ 212 } while(0) 213 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) 214 NEON_VOP_ENV(qadd_s8, neon_s8, 4) 215 #undef NEON_FN 216 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) 217 NEON_VOP_ENV(qadd_s16, neon_s16, 2) 218 #undef NEON_FN 219 #undef NEON_SSAT 220 221 #define NEON_USAT(dest, src1, src2, type) do { \ 222 uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ 223 if (tmp != (type)tmp) { \ 224 SET_QC(); \ 225 dest = 0; \ 226 } else { \ 227 dest = tmp; \ 228 }} while(0) 229 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) 230 NEON_VOP_ENV(qsub_u8, neon_u8, 4) 231 #undef NEON_FN 232 #define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) 233 NEON_VOP_ENV(qsub_u16, neon_u16, 2) 234 #undef NEON_FN 235 #undef NEON_USAT 236 237 #define NEON_SSAT(dest, src1, src2, type) do { \ 238 int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ 239 if (tmp != (type)tmp) { \ 240 SET_QC(); \ 241 if (src2 < 0) { \ 242 tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ 243 } else { \ 244 tmp = 1 << (sizeof(type) * 8 - 1); \ 245 } \ 246 } \ 247 dest = tmp; \ 248 } while(0) 249 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) 250 NEON_VOP_ENV(qsub_s8, neon_s8, 4) 251 #undef NEON_FN 252 #define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) 253 NEON_VOP_ENV(qsub_s16, neon_s16, 2) 254 #undef NEON_FN 255 #undef NEON_SSAT 256 257 #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 258 NEON_VOP(hadd_s8, neon_s8, 4) 259 NEON_VOP(hadd_u8, neon_u8, 4) 260 NEON_VOP(hadd_s16, neon_s16, 2) 261 NEON_VOP(hadd_u16, neon_u16, 2) 262 #undef NEON_FN 263 264 int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) 265 { 266 int32_t dest; 267 268 dest = (src1 >> 1) + (src2 >> 1); 269 if (src1 & src2 & 1) 270 dest++; 271 return dest; 272 } 273 274 uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) 275 { 276 uint32_t dest; 277 278 dest = (src1 >> 1) + (src2 >> 1); 279 if (src1 & src2 & 1) 280 dest++; 281 return dest; 282 } 283 284 #define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 285 NEON_VOP(rhadd_s8, neon_s8, 4) 286 NEON_VOP(rhadd_u8, neon_u8, 4) 287 NEON_VOP(rhadd_s16, neon_s16, 2) 288 NEON_VOP(rhadd_u16, neon_u16, 2) 289 #undef NEON_FN 290 291 int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) 292 { 293 int32_t dest; 294 295 dest = (src1 >> 1) + (src2 >> 1); 296 if ((src1 | src2) & 1) 297 dest++; 298 return dest; 299 } 300 301 uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) 302 { 303 uint32_t dest; 304 305 dest = (src1 >> 1) + (src2 >> 1); 306 if ((src1 | src2) & 1) 307 dest++; 308 return dest; 309 } 310 311 #define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 312 NEON_VOP(hsub_s8, neon_s8, 4) 313 NEON_VOP(hsub_u8, neon_u8, 4) 314 NEON_VOP(hsub_s16, neon_s16, 2) 315 NEON_VOP(hsub_u16, neon_u16, 2) 316 #undef NEON_FN 317 318 int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) 319 { 320 int32_t dest; 321 322 dest = (src1 >> 1) - (src2 >> 1); 323 if ((~src1) & src2 & 1) 324 dest--; 325 return dest; 326 } 327 328 uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) 329 { 330 uint32_t dest; 331 332 dest = (src1 >> 1) - (src2 >> 1); 333 if ((~src1) & src2 & 1) 334 dest--; 335 return dest; 336 } 337 338 #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? ~0 : 0 339 NEON_VOP(cgt_s8, neon_s8, 4) 340 NEON_VOP(cgt_u8, neon_u8, 4) 341 NEON_VOP(cgt_s16, neon_s16, 2) 342 NEON_VOP(cgt_u16, neon_u16, 2) 343 NEON_VOP(cgt_s32, neon_s32, 1) 344 NEON_VOP(cgt_u32, neon_u32, 1) 345 #undef NEON_FN 346 347 #define NEON_FN(dest, src1, src2) dest = (src1 >= src2) ? ~0 : 0 348 NEON_VOP(cge_s8, neon_s8, 4) 349 NEON_VOP(cge_u8, neon_u8, 4) 350 NEON_VOP(cge_s16, neon_s16, 2) 351 NEON_VOP(cge_u16, neon_u16, 2) 352 NEON_VOP(cge_s32, neon_s32, 1) 353 NEON_VOP(cge_u32, neon_u32, 1) 354 #undef NEON_FN 355 356 #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 357 NEON_VOP(min_s8, neon_s8, 4) 358 NEON_VOP(min_u8, neon_u8, 4) 359 NEON_VOP(min_s16, neon_s16, 2) 360 NEON_VOP(min_u16, neon_u16, 2) 361 NEON_VOP(min_s32, neon_s32, 1) 362 NEON_VOP(min_u32, neon_u32, 1) 363 NEON_POP(pmin_s8, neon_s8, 4) 364 NEON_POP(pmin_u8, neon_u8, 4) 365 NEON_POP(pmin_s16, neon_s16, 2) 366 NEON_POP(pmin_u16, neon_u16, 2) 367 #undef NEON_FN 368 369 #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2 370 NEON_VOP(max_s8, neon_s8, 4) 371 NEON_VOP(max_u8, neon_u8, 4) 372 NEON_VOP(max_s16, neon_s16, 2) 373 NEON_VOP(max_u16, neon_u16, 2) 374 NEON_VOP(max_s32, neon_s32, 1) 375 NEON_VOP(max_u32, neon_u32, 1) 376 NEON_POP(pmax_s8, neon_s8, 4) 377 NEON_POP(pmax_u8, neon_u8, 4) 378 NEON_POP(pmax_s16, neon_s16, 2) 379 NEON_POP(pmax_u16, neon_u16, 2) 380 #undef NEON_FN 381 382 #define NEON_FN(dest, src1, src2) \ 383 dest = (src1 > src2) ? (src1 - src2) : (src2 - src1) 384 NEON_VOP(abd_s8, neon_s8, 4) 385 NEON_VOP(abd_u8, neon_u8, 4) 386 NEON_VOP(abd_s16, neon_s16, 2) 387 NEON_VOP(abd_u16, neon_u16, 2) 388 NEON_VOP(abd_s32, neon_s32, 1) 389 NEON_VOP(abd_u32, neon_u32, 1) 390 #undef NEON_FN 391 392 #define NEON_FN(dest, src1, src2) do { \ 393 int8_t tmp; \ 394 tmp = (int8_t)src2; \ 395 if (tmp >= sizeof(src1) * 8 || tmp <= -sizeof(src1) * 8) { \ 396 dest = 0; \ 397 } else if (tmp < 0) { \ 398 dest = src1 >> -tmp; \ 399 } else { \ 400 dest = src1 << tmp; \ 401 }} while (0) 402 NEON_VOP(shl_u8, neon_u8, 4) 403 NEON_VOP(shl_u16, neon_u16, 2) 404 NEON_VOP(shl_u32, neon_u32, 1) 405 #undef NEON_FN 406 407 uint64_t HELPER(neon_shl_u64)(uint64_t val, uint64_t shiftop) 408 { 409 int8_t shift = (int8_t)shiftop; 410 if (shift >= 64 || shift <= -64) { 411 val = 0; 412 } else if (shift < 0) { 413 val >>= -shift; 414 } else { 415 val <<= shift; 416 } 417 return val; 418 } 419 420 #define NEON_FN(dest, src1, src2) do { \ 421 int8_t tmp; \ 422 tmp = (int8_t)src2; \ 423 if (tmp >= sizeof(src1) * 8) { \ 424 dest = 0; \ 425 } else if (tmp <= -sizeof(src1) * 8) { \ 426 dest = src1 >> (sizeof(src1) * 8 - 1); \ 427 } else if (tmp < 0) { \ 428 dest = src1 >> -tmp; \ 429 } else { \ 430 dest = src1 << tmp; \ 431 }} while (0) 432 NEON_VOP(shl_s8, neon_s8, 4) 433 NEON_VOP(shl_s16, neon_s16, 2) 434 NEON_VOP(shl_s32, neon_s32, 1) 435 #undef NEON_FN 436 437 uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) 438 { 439 int8_t shift = (int8_t)shiftop; 440 int64_t val = valop; 441 if (shift >= 64) { 442 val = 0; 443 } else if (shift <= -64) { 444 val >>= 63; 445 } else if (shift < 0) { 446 val >>= -shift; 447 } else { 448 val <<= shift; 449 } 450 return val; 451 } 452 453 #define NEON_FN(dest, src1, src2) do { \ 454 int8_t tmp; \ 455 tmp = (int8_t)src2; \ 456 if (tmp >= sizeof(src1) * 8) { \ 457 dest = 0; \ 458 } else if (tmp < -sizeof(src1) * 8) { \ 459 dest = src1 >> (sizeof(src1) * 8 - 1); \ 460 } else if (tmp == -sizeof(src1) * 8) { \ 461 dest = src1 >> (tmp - 1); \ 462 dest++; \ 463 dest >>= 1; \ 464 } else if (tmp < 0) { \ 465 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 466 } else { \ 467 dest = src1 << tmp; \ 468 }} while (0) 469 NEON_VOP(rshl_s8, neon_s8, 4) 470 NEON_VOP(rshl_s16, neon_s16, 2) 471 NEON_VOP(rshl_s32, neon_s32, 1) 472 #undef NEON_FN 473 474 uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) 475 { 476 int8_t shift = (int8_t)shiftop; 477 int64_t val = valop; 478 if (shift >= 64) { 479 val = 0; 480 } else if (shift < -64) { 481 val >>= 63; 482 } else if (shift == -63) { 483 val >>= 63; 484 val++; 485 val >>= 1; 486 } else if (shift < 0) { 487 val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; 488 } else { 489 val <<= shift; 490 } 491 return val; 492 } 493 494 #define NEON_FN(dest, src1, src2) do { \ 495 int8_t tmp; \ 496 tmp = (int8_t)src2; \ 497 if (tmp >= sizeof(src1) * 8 || tmp < -sizeof(src1) * 8) { \ 498 dest = 0; \ 499 } else if (tmp == -sizeof(src1) * 8) { \ 500 dest = src1 >> (tmp - 1); \ 501 } else if (tmp < 0) { \ 502 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 503 } else { \ 504 dest = src1 << tmp; \ 505 }} while (0) 506 NEON_VOP(rshl_u8, neon_u8, 4) 507 NEON_VOP(rshl_u16, neon_u16, 2) 508 NEON_VOP(rshl_u32, neon_u32, 1) 509 #undef NEON_FN 510 511 uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) 512 { 513 int8_t shift = (uint8_t)shiftop; 514 if (shift >= 64 || shift < 64) { 515 val = 0; 516 } else if (shift == -64) { 517 /* Rounding a 1-bit result just preserves that bit. */ 518 val >>= 63; 519 } if (shift < 0) { 520 val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; 521 val >>= -shift; 522 } else { 523 val <<= shift; 524 } 525 return val; 526 } 527 528 #define NEON_FN(dest, src1, src2) do { \ 529 int8_t tmp; \ 530 tmp = (int8_t)src2; \ 531 if (tmp >= sizeof(src1) * 8) { \ 532 if (src1) { \ 533 SET_QC(); \ 534 dest = ~0; \ 535 } else { \ 536 dest = 0; \ 537 } \ 538 } else if (tmp <= -sizeof(src1) * 8) { \ 539 dest = 0; \ 540 } else if (tmp < 0) { \ 541 dest = src1 >> -tmp; \ 542 } else { \ 543 dest = src1 << tmp; \ 544 if ((dest >> tmp) != src1) { \ 545 SET_QC(); \ 546 dest = ~0; \ 547 } \ 548 }} while (0) 549 NEON_VOP_ENV(qshl_u8, neon_u8, 4) 550 NEON_VOP_ENV(qshl_u16, neon_u16, 2) 551 NEON_VOP_ENV(qshl_u32, neon_u32, 1) 552 #undef NEON_FN 553 554 uint64_t HELPER(neon_qshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) 555 { 556 int8_t shift = (int8_t)shiftop; 557 if (shift >= 64) { 558 if (val) { 559 val = ~(uint64_t)0; 560 SET_QC(); 561 } else { 562 val = 0; 563 } 564 } else if (shift <= -64) { 565 val = 0; 566 } else if (shift < 0) { 567 val >>= -shift; 568 } else { 569 uint64_t tmp = val; 570 val <<= shift; 571 if ((val >> shift) != tmp) { 572 SET_QC(); 573 val = ~(uint64_t)0; 574 } 575 } 576 return val; 577 } 578 579 #define NEON_FN(dest, src1, src2) do { \ 580 int8_t tmp; \ 581 tmp = (int8_t)src2; \ 582 if (tmp >= sizeof(src1) * 8) { \ 583 if (src1) \ 584 SET_QC(); \ 585 dest = src1 >> 31; \ 586 } else if (tmp <= -sizeof(src1) * 8) { \ 587 dest = src1 >> 31; \ 588 } else if (tmp < 0) { \ 589 dest = src1 >> -tmp; \ 590 } else { \ 591 dest = src1 << tmp; \ 592 if ((dest >> tmp) != src1) { \ 593 SET_QC(); \ 594 dest = src2 >> 31; \ 595 } \ 596 }} while (0) 597 NEON_VOP_ENV(qshl_s8, neon_s8, 4) 598 NEON_VOP_ENV(qshl_s16, neon_s16, 2) 599 NEON_VOP_ENV(qshl_s32, neon_s32, 1) 600 #undef NEON_FN 601 602 uint64_t HELPER(neon_qshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) 603 { 604 int8_t shift = (uint8_t)shiftop; 605 int64_t val = valop; 606 if (shift >= 64) { 607 if (val) { 608 SET_QC(); 609 val = (val >> 63) & ~SIGNBIT64; 610 } 611 } else if (shift <= 64) { 612 val >>= 63; 613 } else if (shift < 0) { 614 val >>= -shift; 615 } else { 616 int64_t tmp = val; 617 val <<= shift; 618 if ((val >> shift) != tmp) { 619 SET_QC(); 620 val = (tmp >> 63) ^ ~SIGNBIT64; 621 } 622 } 623 return val; 624 } 625 626 627 /* FIXME: This is wrong. */ 628 #define NEON_FN(dest, src1, src2) do { \ 629 int8_t tmp; \ 630 tmp = (int8_t)src2; \ 631 if (tmp < 0) { \ 632 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 633 } else { \ 634 dest = src1 << tmp; \ 635 if ((dest >> tmp) != src1) { \ 636 SET_QC(); \ 637 dest = ~0; \ 638 } \ 639 }} while (0) 640 NEON_VOP_ENV(qrshl_u8, neon_u8, 4) 641 NEON_VOP_ENV(qrshl_u16, neon_u16, 2) 642 NEON_VOP_ENV(qrshl_u32, neon_u32, 1) 643 #undef NEON_FN 644 645 uint64_t HELPER(neon_qrshl_u64)(CPUState *env, uint64_t val, uint64_t shiftop) 646 { 647 int8_t shift = (int8_t)shiftop; 648 if (shift < 0) { 649 val = (val + (1 << (-1 - shift))) >> -shift; 650 } else { \ 651 uint64_t tmp = val; 652 val <<= shift; 653 if ((val >> shift) != tmp) { 654 SET_QC(); 655 val = ~0; 656 } 657 } 658 return val; 659 } 660 661 #define NEON_FN(dest, src1, src2) do { \ 662 int8_t tmp; \ 663 tmp = (int8_t)src2; \ 664 if (tmp < 0) { \ 665 dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ 666 } else { \ 667 dest = src1 << tmp; \ 668 if ((dest >> tmp) != src1) { \ 669 SET_QC(); \ 670 dest = src1 >> 31; \ 671 } \ 672 }} while (0) 673 NEON_VOP_ENV(qrshl_s8, neon_s8, 4) 674 NEON_VOP_ENV(qrshl_s16, neon_s16, 2) 675 NEON_VOP_ENV(qrshl_s32, neon_s32, 1) 676 #undef NEON_FN 677 678 uint64_t HELPER(neon_qrshl_s64)(CPUState *env, uint64_t valop, uint64_t shiftop) 679 { 680 int8_t shift = (uint8_t)shiftop; 681 int64_t val = valop; 682 683 if (shift < 0) { 684 val = (val + (1 << (-1 - shift))) >> -shift; 685 } else { 686 int64_t tmp = val;; 687 val <<= shift; 688 if ((val >> shift) != tmp) { 689 SET_QC(); 690 val = tmp >> 31; 691 } 692 } 693 return val; 694 } 695 696 uint32_t HELPER(neon_add_u8)(uint32_t a, uint32_t b) 697 { 698 uint32_t mask; 699 mask = (a ^ b) & 0x80808080u; 700 a &= ~0x80808080u; 701 b &= ~0x80808080u; 702 return (a + b) ^ mask; 703 } 704 705 uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) 706 { 707 uint32_t mask; 708 mask = (a ^ b) & 0x80008000u; 709 a &= ~0x80008000u; 710 b &= ~0x80008000u; 711 return (a + b) ^ mask; 712 } 713 714 #define NEON_FN(dest, src1, src2) dest = src1 + src2 715 NEON_POP(padd_u8, neon_u8, 4) 716 NEON_POP(padd_u16, neon_u16, 2) 717 #undef NEON_FN 718 719 #define NEON_FN(dest, src1, src2) dest = src1 - src2 720 NEON_VOP(sub_u8, neon_u8, 4) 721 NEON_VOP(sub_u16, neon_u16, 2) 722 #undef NEON_FN 723 724 #define NEON_FN(dest, src1, src2) dest = src1 * src2 725 NEON_VOP(mul_u8, neon_u8, 4) 726 NEON_VOP(mul_u16, neon_u16, 2) 727 #undef NEON_FN 728 729 /* Polynomial multiplication is like integer multiplication except the 730 partial products are XORed, not added. */ 731 uint32_t HELPER(neon_mul_p8)(uint32_t op1, uint32_t op2) 732 { 733 uint32_t mask; 734 uint32_t result; 735 result = 0; 736 while (op1) { 737 mask = 0; 738 if (op1 & 1) 739 mask |= 0xff; 740 if (op1 & (1 << 8)) 741 mask |= (0xff << 8); 742 if (op1 & (1 << 16)) 743 mask |= (0xff << 16); 744 if (op1 & (1 << 24)) 745 mask |= (0xff << 24); 746 result ^= op2 & mask; 747 op1 = (op1 >> 1) & 0x7f7f7f7f; 748 op2 = (op2 << 1) & 0xfefefefe; 749 } 750 return result; 751 } 752 753 #define NEON_FN(dest, src1, src2) dest = (src1 & src2) ? -1 : 0 754 NEON_VOP(tst_u8, neon_u8, 4) 755 NEON_VOP(tst_u16, neon_u16, 2) 756 NEON_VOP(tst_u32, neon_u32, 1) 757 #undef NEON_FN 758 759 #define NEON_FN(dest, src1, src2) dest = (src1 == src2) ? -1 : 0 760 NEON_VOP(ceq_u8, neon_u8, 4) 761 NEON_VOP(ceq_u16, neon_u16, 2) 762 NEON_VOP(ceq_u32, neon_u32, 1) 763 #undef NEON_FN 764 765 #define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src 766 NEON_VOP1(abs_s8, neon_s8, 4) 767 NEON_VOP1(abs_s16, neon_s16, 2) 768 #undef NEON_FN 769 770 /* Count Leading Sign/Zero Bits. */ 771 static inline int do_clz8(uint8_t x) 772 { 773 int n; 774 for (n = 8; x; n--) 775 x >>= 1; 776 return n; 777 } 778 779 static inline int do_clz16(uint16_t x) 780 { 781 int n; 782 for (n = 16; x; n--) 783 x >>= 1; 784 return n; 785 } 786 787 #define NEON_FN(dest, src, dummy) dest = do_clz8(src) 788 NEON_VOP1(clz_u8, neon_u8, 4) 789 #undef NEON_FN 790 791 #define NEON_FN(dest, src, dummy) dest = do_clz16(src) 792 NEON_VOP1(clz_u16, neon_u16, 2) 793 #undef NEON_FN 794 795 #define NEON_FN(dest, src, dummy) dest = do_clz8((src < 0) ? ~src : src) - 1 796 NEON_VOP1(cls_s8, neon_s8, 4) 797 #undef NEON_FN 798 799 #define NEON_FN(dest, src, dummy) dest = do_clz16((src < 0) ? ~src : src) - 1 800 NEON_VOP1(cls_s16, neon_s16, 2) 801 #undef NEON_FN 802 803 uint32_t HELPER(neon_cls_s32)(uint32_t x) 804 { 805 int count; 806 if ((int32_t)x < 0) 807 x = ~x; 808 for (count = 32; x; count--) 809 x = x >> 1; 810 return count - 1; 811 } 812 813 /* Bit count. */ 814 uint32_t HELPER(neon_cnt_u8)(uint32_t x) 815 { 816 x = (x & 0x55555555) + ((x >> 1) & 0x55555555); 817 x = (x & 0x33333333) + ((x >> 2) & 0x33333333); 818 x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f); 819 return x; 820 } 821 822 #define NEON_QDMULH16(dest, src1, src2, round) do { \ 823 uint32_t tmp = (int32_t)(int16_t) src1 * (int16_t) src2; \ 824 if ((tmp ^ (tmp << 1)) & SIGNBIT) { \ 825 SET_QC(); \ 826 tmp = (tmp >> 31) ^ ~SIGNBIT; \ 827 } \ 828 tmp <<= 1; \ 829 if (round) { \ 830 int32_t old = tmp; \ 831 tmp += 1 << 15; \ 832 if ((int32_t)tmp < old) { \ 833 SET_QC(); \ 834 tmp = SIGNBIT - 1; \ 835 } \ 836 } \ 837 dest = tmp >> 16; \ 838 } while(0) 839 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 0) 840 NEON_VOP_ENV(qdmulh_s16, neon_s16, 2) 841 #undef NEON_FN 842 #define NEON_FN(dest, src1, src2) NEON_QDMULH16(dest, src1, src2, 1) 843 NEON_VOP_ENV(qrdmulh_s16, neon_s16, 2) 844 #undef NEON_FN 845 #undef NEON_QDMULH16 846 847 #define NEON_QDMULH32(dest, src1, src2, round) do { \ 848 uint64_t tmp = (int64_t)(int32_t) src1 * (int32_t) src2; \ 849 if ((tmp ^ (tmp << 1)) & SIGNBIT64) { \ 850 SET_QC(); \ 851 tmp = (tmp >> 63) ^ ~SIGNBIT64; \ 852 } else { \ 853 tmp <<= 1; \ 854 } \ 855 if (round) { \ 856 int64_t old = tmp; \ 857 tmp += (int64_t)1 << 31; \ 858 if ((int64_t)tmp < old) { \ 859 SET_QC(); \ 860 tmp = SIGNBIT64 - 1; \ 861 } \ 862 } \ 863 dest = tmp >> 32; \ 864 } while(0) 865 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 0) 866 NEON_VOP_ENV(qdmulh_s32, neon_s32, 1) 867 #undef NEON_FN 868 #define NEON_FN(dest, src1, src2) NEON_QDMULH32(dest, src1, src2, 1) 869 NEON_VOP_ENV(qrdmulh_s32, neon_s32, 1) 870 #undef NEON_FN 871 #undef NEON_QDMULH32 872 873 uint32_t HELPER(neon_narrow_u8)(uint64_t x) 874 { 875 return (x & 0xffu) | ((x >> 8) & 0xff00u) | ((x >> 16) & 0xff0000u) 876 | ((x >> 24) & 0xff000000u); 877 } 878 879 uint32_t HELPER(neon_narrow_u16)(uint64_t x) 880 { 881 return (x & 0xffffu) | ((x >> 16) & 0xffff0000u); 882 } 883 884 uint32_t HELPER(neon_narrow_high_u8)(uint64_t x) 885 { 886 return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) 887 | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); 888 } 889 890 uint32_t HELPER(neon_narrow_high_u16)(uint64_t x) 891 { 892 return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); 893 } 894 895 uint32_t HELPER(neon_narrow_round_high_u8)(uint64_t x) 896 { 897 x &= 0xff80ff80ff80ff80ull; 898 x += 0x0080008000800080ull; 899 return ((x >> 8) & 0xff) | ((x >> 16) & 0xff00) 900 | ((x >> 24) & 0xff0000) | ((x >> 32) & 0xff000000); 901 } 902 903 uint32_t HELPER(neon_narrow_round_high_u16)(uint64_t x) 904 { 905 x &= 0xffff8000ffff8000ull; 906 x += 0x0000800000008000ull; 907 return ((x >> 16) & 0xffff) | ((x >> 32) & 0xffff0000); 908 } 909 910 uint32_t HELPER(neon_narrow_sat_u8)(CPUState *env, uint64_t x) 911 { 912 uint16_t s; 913 uint8_t d; 914 uint32_t res = 0; 915 #define SAT8(n) \ 916 s = x >> n; \ 917 if (s > 0xff) { \ 918 d = 0xff; \ 919 SET_QC(); \ 920 } else { \ 921 d = s; \ 922 } \ 923 res |= (uint32_t)d << (n / 2); 924 925 SAT8(0); 926 SAT8(16); 927 SAT8(32); 928 SAT8(48); 929 #undef SAT8 930 return res; 931 } 932 933 uint32_t HELPER(neon_narrow_sat_s8)(CPUState *env, uint64_t x) 934 { 935 int16_t s; 936 uint8_t d; 937 uint32_t res = 0; 938 #define SAT8(n) \ 939 s = x >> n; \ 940 if (s != (int8_t)s) { \ 941 d = (s >> 15) ^ 0x7f; \ 942 SET_QC(); \ 943 } else { \ 944 d = s; \ 945 } \ 946 res |= (uint32_t)d << (n / 2); 947 948 SAT8(0); 949 SAT8(16); 950 SAT8(32); 951 SAT8(48); 952 #undef SAT8 953 return res; 954 } 955 956 uint32_t HELPER(neon_narrow_sat_u16)(CPUState *env, uint64_t x) 957 { 958 uint32_t high; 959 uint32_t low; 960 low = x; 961 if (low > 0xffff) { 962 low = 0xffff; 963 SET_QC(); 964 } 965 high = x >> 32; 966 if (high > 0xffff) { 967 high = 0xffff; 968 SET_QC(); 969 } 970 return low | (high << 16); 971 } 972 973 uint32_t HELPER(neon_narrow_sat_s16)(CPUState *env, uint64_t x) 974 { 975 int32_t low; 976 int32_t high; 977 low = x; 978 if (low != (int16_t)low) { 979 low = (low >> 31) ^ 0x7fff; 980 SET_QC(); 981 } 982 high = x >> 32; 983 if (high != (int16_t)high) { 984 high = (high >> 31) ^ 0x7fff; 985 SET_QC(); 986 } 987 return (uint16_t)low | (high << 16); 988 } 989 990 uint32_t HELPER(neon_narrow_sat_u32)(CPUState *env, uint64_t x) 991 { 992 if (x > 0xffffffffu) { 993 SET_QC(); 994 return 0xffffffffu; 995 } 996 return x; 997 } 998 999 uint32_t HELPER(neon_narrow_sat_s32)(CPUState *env, uint64_t x) 1000 { 1001 if ((int64_t)x != (int32_t)x) { 1002 SET_QC(); 1003 return (x >> 63) ^ 0x7fffffff; 1004 } 1005 return x; 1006 } 1007 1008 uint64_t HELPER(neon_widen_u8)(uint32_t x) 1009 { 1010 uint64_t tmp; 1011 uint64_t ret; 1012 ret = (uint8_t)x; 1013 tmp = (uint8_t)(x >> 8); 1014 ret |= tmp << 16; 1015 tmp = (uint8_t)(x >> 16); 1016 ret |= tmp << 32; 1017 tmp = (uint8_t)(x >> 24); 1018 ret |= tmp << 48; 1019 return ret; 1020 } 1021 1022 uint64_t HELPER(neon_widen_s8)(uint32_t x) 1023 { 1024 uint64_t tmp; 1025 uint64_t ret; 1026 ret = (uint16_t)(int8_t)x; 1027 tmp = (uint16_t)(int8_t)(x >> 8); 1028 ret |= tmp << 16; 1029 tmp = (uint16_t)(int8_t)(x >> 16); 1030 ret |= tmp << 32; 1031 tmp = (uint16_t)(int8_t)(x >> 24); 1032 ret |= tmp << 48; 1033 return ret; 1034 } 1035 1036 uint64_t HELPER(neon_widen_u16)(uint32_t x) 1037 { 1038 uint64_t high = (uint16_t)(x >> 16); 1039 return ((uint16_t)x) | (high << 32); 1040 } 1041 1042 uint64_t HELPER(neon_widen_s16)(uint32_t x) 1043 { 1044 uint64_t high = (int16_t)(x >> 16); 1045 return ((uint32_t)(int16_t)x) | (high << 32); 1046 } 1047 1048 uint64_t HELPER(neon_addl_u16)(uint64_t a, uint64_t b) 1049 { 1050 uint64_t mask; 1051 mask = (a ^ b) & 0x8000800080008000ull; 1052 a &= ~0x8000800080008000ull; 1053 b &= ~0x8000800080008000ull; 1054 return (a + b) ^ mask; 1055 } 1056 1057 uint64_t HELPER(neon_addl_u32)(uint64_t a, uint64_t b) 1058 { 1059 uint64_t mask; 1060 mask = (a ^ b) & 0x8000000080000000ull; 1061 a &= ~0x8000000080000000ull; 1062 b &= ~0x8000000080000000ull; 1063 return (a + b) ^ mask; 1064 } 1065 1066 uint64_t HELPER(neon_paddl_u16)(uint64_t a, uint64_t b) 1067 { 1068 uint64_t tmp; 1069 uint64_t tmp2; 1070 1071 tmp = a & 0x0000ffff0000ffffull; 1072 tmp += (a >> 16) & 0x0000ffff0000ffffull; 1073 tmp2 = b & 0xffff0000ffff0000ull; 1074 tmp2 += (b << 16) & 0xffff0000ffff0000ull; 1075 return ( tmp & 0xffff) 1076 | ((tmp >> 16) & 0xffff0000ull) 1077 | ((tmp2 << 16) & 0xffff00000000ull) 1078 | ( tmp2 & 0xffff000000000000ull); 1079 } 1080 1081 uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b) 1082 { 1083 uint32_t low = a + (a >> 32); 1084 uint32_t high = b + (b >> 32); 1085 return low + ((uint64_t)high << 32); 1086 } 1087 1088 uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b) 1089 { 1090 uint64_t mask; 1091 mask = (a ^ ~b) & 0x8000800080008000ull; 1092 a |= 0x8000800080008000ull; 1093 b &= ~0x8000800080008000ull; 1094 return (a - b) ^ mask; 1095 } 1096 1097 uint64_t HELPER(neon_subl_u32)(uint64_t a, uint64_t b) 1098 { 1099 uint64_t mask; 1100 mask = (a ^ ~b) & 0x8000000080000000ull; 1101 a |= 0x8000000080000000ull; 1102 b &= ~0x8000000080000000ull; 1103 return (a - b) ^ mask; 1104 } 1105 1106 uint64_t HELPER(neon_addl_saturate_s32)(CPUState *env, uint64_t a, uint64_t b) 1107 { 1108 uint32_t x, y; 1109 uint32_t low, high; 1110 1111 x = a; 1112 y = b; 1113 low = x + y; 1114 if (((low ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { 1115 SET_QC(); 1116 low = ((int32_t)x >> 31) ^ ~SIGNBIT; 1117 } 1118 x = a >> 32; 1119 y = b >> 32; 1120 high = x + y; 1121 if (((high ^ x) & SIGNBIT) && !((x ^ y) & SIGNBIT)) { 1122 SET_QC(); 1123 high = ((int32_t)x >> 31) ^ ~SIGNBIT; 1124 } 1125 return low | ((uint64_t)high << 32); 1126 } 1127 1128 uint64_t HELPER(neon_addl_saturate_s64)(CPUState *env, uint64_t a, uint64_t b) 1129 { 1130 uint64_t result; 1131 1132 result = a + b; 1133 if (((result ^ a) & SIGNBIT64) && !((a ^ b) & SIGNBIT64)) { 1134 SET_QC(); 1135 result = ((int64_t)a >> 63) ^ ~SIGNBIT64; 1136 } 1137 return result; 1138 } 1139 1140 #define DO_ABD(dest, x, y, type) do { \ 1141 type tmp_x = x; \ 1142 type tmp_y = y; \ 1143 dest = ((tmp_x > tmp_y) ? tmp_x - tmp_y : tmp_y - tmp_x); \ 1144 } while(0) 1145 1146 uint64_t HELPER(neon_abdl_u16)(uint32_t a, uint32_t b) 1147 { 1148 uint64_t tmp; 1149 uint64_t result; 1150 DO_ABD(result, a, b, uint8_t); 1151 DO_ABD(tmp, a >> 8, b >> 8, uint8_t); 1152 result |= tmp << 16; 1153 DO_ABD(tmp, a >> 16, b >> 16, uint8_t); 1154 result |= tmp << 32; 1155 DO_ABD(tmp, a >> 24, b >> 24, uint8_t); 1156 result |= tmp << 48; 1157 return result; 1158 } 1159 1160 uint64_t HELPER(neon_abdl_s16)(uint32_t a, uint32_t b) 1161 { 1162 uint64_t tmp; 1163 uint64_t result; 1164 DO_ABD(result, a, b, int8_t); 1165 DO_ABD(tmp, a >> 8, b >> 8, int8_t); 1166 result |= tmp << 16; 1167 DO_ABD(tmp, a >> 16, b >> 16, int8_t); 1168 result |= tmp << 32; 1169 DO_ABD(tmp, a >> 24, b >> 24, int8_t); 1170 result |= tmp << 48; 1171 return result; 1172 } 1173 1174 uint64_t HELPER(neon_abdl_u32)(uint32_t a, uint32_t b) 1175 { 1176 uint64_t tmp; 1177 uint64_t result; 1178 DO_ABD(result, a, b, uint16_t); 1179 DO_ABD(tmp, a >> 16, b >> 16, uint16_t); 1180 return result | (tmp << 32); 1181 } 1182 1183 uint64_t HELPER(neon_abdl_s32)(uint32_t a, uint32_t b) 1184 { 1185 uint64_t tmp; 1186 uint64_t result; 1187 DO_ABD(result, a, b, int16_t); 1188 DO_ABD(tmp, a >> 16, b >> 16, int16_t); 1189 return result | (tmp << 32); 1190 } 1191 1192 uint64_t HELPER(neon_abdl_u64)(uint32_t a, uint32_t b) 1193 { 1194 uint64_t result; 1195 DO_ABD(result, a, b, uint32_t); 1196 return result; 1197 } 1198 1199 uint64_t HELPER(neon_abdl_s64)(uint32_t a, uint32_t b) 1200 { 1201 uint64_t result; 1202 DO_ABD(result, a, b, int32_t); 1203 return result; 1204 } 1205 #undef DO_ABD 1206 1207 /* Widening multiply. Named type is the source type. */ 1208 #define DO_MULL(dest, x, y, type1, type2) do { \ 1209 type1 tmp_x = x; \ 1210 type1 tmp_y = y; \ 1211 dest = (type2)((type2)tmp_x * (type2)tmp_y); \ 1212 } while(0) 1213 1214 uint64_t HELPER(neon_mull_u8)(uint32_t a, uint32_t b) 1215 { 1216 uint64_t tmp; 1217 uint64_t result; 1218 1219 DO_MULL(result, a, b, uint8_t, uint16_t); 1220 DO_MULL(tmp, a >> 8, b >> 8, uint8_t, uint16_t); 1221 result |= tmp << 16; 1222 DO_MULL(tmp, a >> 16, b >> 16, uint8_t, uint16_t); 1223 result |= tmp << 32; 1224 DO_MULL(tmp, a >> 24, b >> 24, uint8_t, uint16_t); 1225 result |= tmp << 48; 1226 return result; 1227 } 1228 1229 uint64_t HELPER(neon_mull_s8)(uint32_t a, uint32_t b) 1230 { 1231 uint64_t tmp; 1232 uint64_t result; 1233 1234 DO_MULL(result, a, b, int8_t, uint16_t); 1235 DO_MULL(tmp, a >> 8, b >> 8, int8_t, uint16_t); 1236 result |= tmp << 16; 1237 DO_MULL(tmp, a >> 16, b >> 16, int8_t, uint16_t); 1238 result |= tmp << 32; 1239 DO_MULL(tmp, a >> 24, b >> 24, int8_t, uint16_t); 1240 result |= tmp << 48; 1241 return result; 1242 } 1243 1244 uint64_t HELPER(neon_mull_u16)(uint32_t a, uint32_t b) 1245 { 1246 uint64_t tmp; 1247 uint64_t result; 1248 1249 DO_MULL(result, a, b, uint16_t, uint32_t); 1250 DO_MULL(tmp, a >> 16, b >> 16, uint16_t, uint32_t); 1251 return result | (tmp << 32); 1252 } 1253 1254 uint64_t HELPER(neon_mull_s16)(uint32_t a, uint32_t b) 1255 { 1256 uint64_t tmp; 1257 uint64_t result; 1258 1259 DO_MULL(result, a, b, int16_t, uint32_t); 1260 DO_MULL(tmp, a >> 16, b >> 16, int16_t, uint32_t); 1261 return result | (tmp << 32); 1262 } 1263 1264 uint64_t HELPER(neon_negl_u16)(uint64_t x) 1265 { 1266 uint16_t tmp; 1267 uint64_t result; 1268 result = (uint16_t)-x; 1269 tmp = -(x >> 16); 1270 result |= (uint64_t)tmp << 16; 1271 tmp = -(x >> 32); 1272 result |= (uint64_t)tmp << 32; 1273 tmp = -(x >> 48); 1274 result |= (uint64_t)tmp << 48; 1275 return result; 1276 } 1277 1278 #include <stdio.h> 1279 uint64_t HELPER(neon_negl_u32)(uint64_t x) 1280 { 1281 uint32_t low = -x; 1282 uint32_t high = -(x >> 32); 1283 return low | ((uint64_t)high << 32); 1284 } 1285 1286 /* FIXME: There should be a native op for this. */ 1287 uint64_t HELPER(neon_negl_u64)(uint64_t x) 1288 { 1289 return -x; 1290 } 1291 1292 /* Saturnating sign manuipulation. */ 1293 /* ??? Make these use NEON_VOP1 */ 1294 #define DO_QABS8(x) do { \ 1295 if (x == (int8_t)0x80) { \ 1296 x = 0x7f; \ 1297 SET_QC(); \ 1298 } else if (x < 0) { \ 1299 x = -x; \ 1300 }} while (0) 1301 uint32_t HELPER(neon_qabs_s8)(CPUState *env, uint32_t x) 1302 { 1303 neon_s8 vec; 1304 NEON_UNPACK(neon_s8, vec, x); 1305 DO_QABS8(vec.v1); 1306 DO_QABS8(vec.v2); 1307 DO_QABS8(vec.v3); 1308 DO_QABS8(vec.v4); 1309 NEON_PACK(neon_s8, x, vec); 1310 return x; 1311 } 1312 #undef DO_QABS8 1313 1314 #define DO_QNEG8(x) do { \ 1315 if (x == (int8_t)0x80) { \ 1316 x = 0x7f; \ 1317 SET_QC(); \ 1318 } else { \ 1319 x = -x; \ 1320 }} while (0) 1321 uint32_t HELPER(neon_qneg_s8)(CPUState *env, uint32_t x) 1322 { 1323 neon_s8 vec; 1324 NEON_UNPACK(neon_s8, vec, x); 1325 DO_QNEG8(vec.v1); 1326 DO_QNEG8(vec.v2); 1327 DO_QNEG8(vec.v3); 1328 DO_QNEG8(vec.v4); 1329 NEON_PACK(neon_s8, x, vec); 1330 return x; 1331 } 1332 #undef DO_QNEG8 1333 1334 #define DO_QABS16(x) do { \ 1335 if (x == (int16_t)0x8000) { \ 1336 x = 0x7fff; \ 1337 SET_QC(); \ 1338 } else if (x < 0) { \ 1339 x = -x; \ 1340 }} while (0) 1341 uint32_t HELPER(neon_qabs_s16)(CPUState *env, uint32_t x) 1342 { 1343 neon_s16 vec; 1344 NEON_UNPACK(neon_s16, vec, x); 1345 DO_QABS16(vec.v1); 1346 DO_QABS16(vec.v2); 1347 NEON_PACK(neon_s16, x, vec); 1348 return x; 1349 } 1350 #undef DO_QABS16 1351 1352 #define DO_QNEG16(x) do { \ 1353 if (x == (int16_t)0x8000) { \ 1354 x = 0x7fff; \ 1355 SET_QC(); \ 1356 } else { \ 1357 x = -x; \ 1358 }} while (0) 1359 uint32_t HELPER(neon_qneg_s16)(CPUState *env, uint32_t x) 1360 { 1361 neon_s16 vec; 1362 NEON_UNPACK(neon_s16, vec, x); 1363 DO_QNEG16(vec.v1); 1364 DO_QNEG16(vec.v2); 1365 NEON_PACK(neon_s16, x, vec); 1366 return x; 1367 } 1368 #undef DO_QNEG16 1369 1370 uint32_t HELPER(neon_qabs_s32)(CPUState *env, uint32_t x) 1371 { 1372 if (x == SIGNBIT) { 1373 SET_QC(); 1374 x = ~SIGNBIT; 1375 } else if ((int32_t)x < 0) { 1376 x = -x; 1377 } 1378 return x; 1379 } 1380 1381 uint32_t HELPER(neon_qneg_s32)(CPUState *env, uint32_t x) 1382 { 1383 if (x == SIGNBIT) { 1384 SET_QC(); 1385 x = ~SIGNBIT; 1386 } else { 1387 x = -x; 1388 } 1389 return x; 1390 } 1391 1392 /* NEON Float helpers. */ 1393 uint32_t HELPER(neon_min_f32)(uint32_t a, uint32_t b) 1394 { 1395 float32 f0 = vfp_itos(a); 1396 float32 f1 = vfp_itos(b); 1397 return (float32_compare_quiet(f0, f1, NFS) == -1) ? a : b; 1398 } 1399 1400 uint32_t HELPER(neon_max_f32)(uint32_t a, uint32_t b) 1401 { 1402 float32 f0 = vfp_itos(a); 1403 float32 f1 = vfp_itos(b); 1404 return (float32_compare_quiet(f0, f1, NFS) == 1) ? a : b; 1405 } 1406 1407 uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b) 1408 { 1409 float32 f0 = vfp_itos(a); 1410 float32 f1 = vfp_itos(b); 1411 return vfp_stoi((float32_compare_quiet(f0, f1, NFS) == 1) 1412 ? float32_sub(f0, f1, NFS) 1413 : float32_sub(f1, f0, NFS)); 1414 } 1415 1416 uint32_t HELPER(neon_add_f32)(uint32_t a, uint32_t b) 1417 { 1418 return vfp_stoi(float32_add(vfp_itos(a), vfp_itos(b), NFS)); 1419 } 1420 1421 uint32_t HELPER(neon_sub_f32)(uint32_t a, uint32_t b) 1422 { 1423 return vfp_stoi(float32_sub(vfp_itos(a), vfp_itos(b), NFS)); 1424 } 1425 1426 uint32_t HELPER(neon_mul_f32)(uint32_t a, uint32_t b) 1427 { 1428 return vfp_stoi(float32_mul(vfp_itos(a), vfp_itos(b), NFS)); 1429 } 1430 1431 /* Floating point comparisons produce an integer result. */ 1432 #define NEON_VOP_FCMP(name, cmp) \ 1433 uint32_t HELPER(neon_##name)(uint32_t a, uint32_t b) \ 1434 { \ 1435 if (float32_compare_quiet(vfp_itos(a), vfp_itos(b), NFS) cmp 0) \ 1436 return ~0; \ 1437 else \ 1438 return 0; \ 1439 } 1440 1441 NEON_VOP_FCMP(ceq_f32, ==) 1442 NEON_VOP_FCMP(cge_f32, >=) 1443 NEON_VOP_FCMP(cgt_f32, >) 1444 1445 uint32_t HELPER(neon_acge_f32)(uint32_t a, uint32_t b) 1446 { 1447 float32 f0 = float32_abs(vfp_itos(a)); 1448 float32 f1 = float32_abs(vfp_itos(b)); 1449 return (float32_compare_quiet(f0, f1,NFS) >= 0) ? ~0 : 0; 1450 } 1451 1452 uint32_t HELPER(neon_acgt_f32)(uint32_t a, uint32_t b) 1453 { 1454 float32 f0 = float32_abs(vfp_itos(a)); 1455 float32 f1 = float32_abs(vfp_itos(b)); 1456 return (float32_compare_quiet(f0, f1, NFS) > 0) ? ~0 : 0; 1457 } 1458