1 /* 2 * jsimd_arm.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright 2009-2011, 2013-2014 D. R. Commander 6 * 7 * Based on the x86 SIMD extension for IJG JPEG library, 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc 10 * 11 * This file contains the interface between the "normal" portions 12 * of the library and the SIMD implementations when running on a 13 * 32-bit ARM architecture. 14 */ 15 16 #define JPEG_INTERNALS 17 #include "../jinclude.h" 18 #include "../jpeglib.h" 19 #include "../jsimd.h" 20 #include "../jdct.h" 21 #include "../jsimddct.h" 22 #include "jsimd.h" 23 24 #include <stdio.h> 25 #include <string.h> 26 #include <ctype.h> 27 28 static unsigned int simd_support = ~0; 29 30 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 31 32 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) 33 34 LOCAL(int) 35 check_feature (char *buffer, char *feature) 36 { 37 char *p; 38 if (*feature == 0) 39 return 0; 40 if (strncmp(buffer, "Features", 8) != 0) 41 return 0; 42 buffer += 8; 43 while (isspace(*buffer)) 44 buffer++; 45 46 /* Check if 'feature' is present in the buffer as a separate word */ 47 while ((p = strstr(buffer, feature))) { 48 if (p > buffer && !isspace(*(p - 1))) { 49 buffer++; 50 continue; 51 } 52 p += strlen(feature); 53 if (*p != 0 && !isspace(*p)) { 54 buffer++; 55 continue; 56 } 57 return 1; 58 } 59 return 0; 60 } 61 62 LOCAL(int) 63 parse_proc_cpuinfo (int bufsize) 64 { 65 char *buffer = (char *)malloc(bufsize); 66 FILE *fd; 67 simd_support = 0; 68 69 if (!buffer) 70 return 0; 71 72 fd = fopen("/proc/cpuinfo", "r"); 73 if (fd) { 74 while (fgets(buffer, bufsize, fd)) { 75 if (!strchr(buffer, '\n') && !feof(fd)) { 76 /* "impossible" happened - insufficient size of the buffer! */ 77 fclose(fd); 78 free(buffer); 79 return 0; 80 } 81 if (check_feature(buffer, "neon")) 82 simd_support |= JSIMD_ARM_NEON; 83 } 84 fclose(fd); 85 } 86 free(buffer); 87 return 1; 88 } 89 90 #endif 91 92 /* 93 * Check what SIMD accelerations are supported. 94 * 95 * FIXME: This code is racy under a multi-threaded environment. 96 */ 97 LOCAL(void) 98 init_simd (void) 99 { 100 char *env = NULL; 101 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 102 int bufsize = 1024; /* an initial guess for the line buffer size limit */ 103 #endif 104 105 if (simd_support != ~0U) 106 return; 107 108 simd_support = 0; 109 110 #if defined(__ARM_NEON__) 111 simd_support |= JSIMD_ARM_NEON; 112 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 113 /* We still have a chance to use NEON regardless of globally used 114 * -mcpu/-mfpu options passed to gcc by performing runtime detection via 115 * /proc/cpuinfo parsing on linux/android */ 116 while (!parse_proc_cpuinfo(bufsize)) { 117 bufsize *= 2; 118 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) 119 break; 120 } 121 #endif 122 123 /* Force different settings through environment variables */ 124 env = getenv("JSIMD_FORCENEON"); 125 if ((env != NULL) && (strcmp(env, "1") == 0)) 126 simd_support &= JSIMD_ARM_NEON; 127 env = getenv("JSIMD_FORCENONE"); 128 if ((env != NULL) && (strcmp(env, "1") == 0)) 129 simd_support = 0; 130 } 131 132 GLOBAL(int) 133 jsimd_can_rgb_ycc (void) 134 { 135 init_simd(); 136 137 /* The code is optimised for these values only */ 138 if (BITS_IN_JSAMPLE != 8) 139 return 0; 140 if (sizeof(JDIMENSION) != 4) 141 return 0; 142 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 143 return 0; 144 145 if (simd_support & JSIMD_ARM_NEON) 146 return 1; 147 148 return 0; 149 } 150 151 GLOBAL(int) 152 jsimd_can_rgb_gray (void) 153 { 154 init_simd(); 155 156 return 0; 157 } 158 159 GLOBAL(int) 160 jsimd_can_ycc_rgb (void) 161 { 162 init_simd(); 163 164 /* The code is optimised for these values only */ 165 if (BITS_IN_JSAMPLE != 8) 166 return 0; 167 if (sizeof(JDIMENSION) != 4) 168 return 0; 169 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 170 return 0; 171 172 if (simd_support & JSIMD_ARM_NEON) 173 return 1; 174 175 return 0; 176 } 177 178 GLOBAL(int) 179 jsimd_can_ycc_rgb565 (void) 180 { 181 init_simd(); 182 183 /* The code is optimised for these values only */ 184 if (BITS_IN_JSAMPLE != 8) 185 return 0; 186 if (sizeof(JDIMENSION) != 4) 187 return 0; 188 189 if (simd_support & JSIMD_ARM_NEON) 190 return 1; 191 192 return 0; 193 } 194 195 GLOBAL(void) 196 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 197 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 198 JDIMENSION output_row, int num_rows) 199 { 200 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 201 202 switch(cinfo->in_color_space) { 203 case JCS_EXT_RGB: 204 neonfct=jsimd_extrgb_ycc_convert_neon; 205 break; 206 case JCS_EXT_RGBX: 207 case JCS_EXT_RGBA: 208 neonfct=jsimd_extrgbx_ycc_convert_neon; 209 break; 210 case JCS_EXT_BGR: 211 neonfct=jsimd_extbgr_ycc_convert_neon; 212 break; 213 case JCS_EXT_BGRX: 214 case JCS_EXT_BGRA: 215 neonfct=jsimd_extbgrx_ycc_convert_neon; 216 break; 217 case JCS_EXT_XBGR: 218 case JCS_EXT_ABGR: 219 neonfct=jsimd_extxbgr_ycc_convert_neon; 220 break; 221 case JCS_EXT_XRGB: 222 case JCS_EXT_ARGB: 223 neonfct=jsimd_extxrgb_ycc_convert_neon; 224 break; 225 default: 226 neonfct=jsimd_extrgb_ycc_convert_neon; 227 break; 228 } 229 230 if (simd_support & JSIMD_ARM_NEON) 231 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 232 } 233 234 GLOBAL(void) 235 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 236 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 237 JDIMENSION output_row, int num_rows) 238 { 239 } 240 241 GLOBAL(void) 242 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 243 JSAMPIMAGE input_buf, JDIMENSION input_row, 244 JSAMPARRAY output_buf, int num_rows) 245 { 246 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 247 248 switch(cinfo->out_color_space) { 249 case JCS_EXT_RGB: 250 neonfct=jsimd_ycc_extrgb_convert_neon; 251 break; 252 case JCS_EXT_RGBX: 253 case JCS_EXT_RGBA: 254 neonfct=jsimd_ycc_extrgbx_convert_neon; 255 break; 256 case JCS_EXT_BGR: 257 neonfct=jsimd_ycc_extbgr_convert_neon; 258 break; 259 case JCS_EXT_BGRX: 260 case JCS_EXT_BGRA: 261 neonfct=jsimd_ycc_extbgrx_convert_neon; 262 break; 263 case JCS_EXT_XBGR: 264 case JCS_EXT_ABGR: 265 neonfct=jsimd_ycc_extxbgr_convert_neon; 266 break; 267 case JCS_EXT_XRGB: 268 case JCS_EXT_ARGB: 269 neonfct=jsimd_ycc_extxrgb_convert_neon; 270 break; 271 default: 272 neonfct=jsimd_ycc_extrgb_convert_neon; 273 break; 274 } 275 276 if (simd_support & JSIMD_ARM_NEON) 277 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 278 } 279 280 GLOBAL(void) 281 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 282 JSAMPIMAGE input_buf, JDIMENSION input_row, 283 JSAMPARRAY output_buf, int num_rows) 284 { 285 if (simd_support & JSIMD_ARM_NEON) 286 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, 287 output_buf, num_rows); 288 } 289 290 GLOBAL(int) 291 jsimd_can_h2v2_downsample (void) 292 { 293 init_simd(); 294 295 return 0; 296 } 297 298 GLOBAL(int) 299 jsimd_can_h2v1_downsample (void) 300 { 301 init_simd(); 302 303 return 0; 304 } 305 306 GLOBAL(void) 307 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 308 JSAMPARRAY input_data, JSAMPARRAY output_data) 309 { 310 } 311 312 GLOBAL(void) 313 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 314 JSAMPARRAY input_data, JSAMPARRAY output_data) 315 { 316 } 317 318 GLOBAL(int) 319 jsimd_can_h2v2_upsample (void) 320 { 321 init_simd(); 322 323 return 0; 324 } 325 326 GLOBAL(int) 327 jsimd_can_h2v1_upsample (void) 328 { 329 init_simd(); 330 331 return 0; 332 } 333 334 GLOBAL(void) 335 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 336 jpeg_component_info * compptr, 337 JSAMPARRAY input_data, 338 JSAMPARRAY * output_data_ptr) 339 { 340 } 341 342 GLOBAL(void) 343 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 344 jpeg_component_info * compptr, 345 JSAMPARRAY input_data, 346 JSAMPARRAY * output_data_ptr) 347 { 348 } 349 350 GLOBAL(int) 351 jsimd_can_h2v2_fancy_upsample (void) 352 { 353 init_simd(); 354 355 return 0; 356 } 357 358 GLOBAL(int) 359 jsimd_can_h2v1_fancy_upsample (void) 360 { 361 init_simd(); 362 363 /* The code is optimised for these values only */ 364 if (BITS_IN_JSAMPLE != 8) 365 return 0; 366 if (sizeof(JDIMENSION) != 4) 367 return 0; 368 369 if (simd_support & JSIMD_ARM_NEON) 370 return 1; 371 372 return 0; 373 } 374 375 GLOBAL(void) 376 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 377 jpeg_component_info * compptr, 378 JSAMPARRAY input_data, 379 JSAMPARRAY * output_data_ptr) 380 { 381 } 382 383 GLOBAL(void) 384 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 385 jpeg_component_info * compptr, 386 JSAMPARRAY input_data, 387 JSAMPARRAY * output_data_ptr) 388 { 389 if (simd_support & JSIMD_ARM_NEON) 390 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, 391 compptr->downsampled_width, input_data, 392 output_data_ptr); 393 } 394 395 GLOBAL(int) 396 jsimd_can_h2v2_merged_upsample (void) 397 { 398 init_simd(); 399 400 return 0; 401 } 402 403 GLOBAL(int) 404 jsimd_can_h2v1_merged_upsample (void) 405 { 406 init_simd(); 407 408 return 0; 409 } 410 411 GLOBAL(void) 412 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 413 JSAMPIMAGE input_buf, 414 JDIMENSION in_row_group_ctr, 415 JSAMPARRAY output_buf) 416 { 417 } 418 419 GLOBAL(void) 420 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 421 JSAMPIMAGE input_buf, 422 JDIMENSION in_row_group_ctr, 423 JSAMPARRAY output_buf) 424 { 425 } 426 427 GLOBAL(int) 428 jsimd_can_convsamp (void) 429 { 430 init_simd(); 431 432 /* The code is optimised for these values only */ 433 if (DCTSIZE != 8) 434 return 0; 435 if (BITS_IN_JSAMPLE != 8) 436 return 0; 437 if (sizeof(JDIMENSION) != 4) 438 return 0; 439 if (sizeof(DCTELEM) != 2) 440 return 0; 441 442 if (simd_support & JSIMD_ARM_NEON) 443 return 1; 444 445 return 0; 446 } 447 448 GLOBAL(int) 449 jsimd_can_convsamp_float (void) 450 { 451 init_simd(); 452 453 return 0; 454 } 455 456 GLOBAL(void) 457 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 458 DCTELEM * workspace) 459 { 460 if (simd_support & JSIMD_ARM_NEON) 461 jsimd_convsamp_neon(sample_data, start_col, workspace); 462 } 463 464 GLOBAL(void) 465 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 466 FAST_FLOAT * workspace) 467 { 468 } 469 470 GLOBAL(int) 471 jsimd_can_fdct_islow (void) 472 { 473 init_simd(); 474 475 return 0; 476 } 477 478 GLOBAL(int) 479 jsimd_can_fdct_ifast (void) 480 { 481 init_simd(); 482 483 /* The code is optimised for these values only */ 484 if (DCTSIZE != 8) 485 return 0; 486 if (sizeof(DCTELEM) != 2) 487 return 0; 488 489 if (simd_support & JSIMD_ARM_NEON) 490 return 1; 491 492 return 0; 493 } 494 495 GLOBAL(int) 496 jsimd_can_fdct_float (void) 497 { 498 init_simd(); 499 500 return 0; 501 } 502 503 GLOBAL(void) 504 jsimd_fdct_islow (DCTELEM * data) 505 { 506 } 507 508 GLOBAL(void) 509 jsimd_fdct_ifast (DCTELEM * data) 510 { 511 if (simd_support & JSIMD_ARM_NEON) 512 jsimd_fdct_ifast_neon(data); 513 } 514 515 GLOBAL(void) 516 jsimd_fdct_float (FAST_FLOAT * data) 517 { 518 } 519 520 GLOBAL(int) 521 jsimd_can_quantize (void) 522 { 523 init_simd(); 524 525 /* The code is optimised for these values only */ 526 if (DCTSIZE != 8) 527 return 0; 528 if (sizeof(JCOEF) != 2) 529 return 0; 530 if (sizeof(DCTELEM) != 2) 531 return 0; 532 533 if (simd_support & JSIMD_ARM_NEON) 534 return 1; 535 536 return 0; 537 } 538 539 GLOBAL(int) 540 jsimd_can_quantize_float (void) 541 { 542 init_simd(); 543 544 return 0; 545 } 546 547 GLOBAL(void) 548 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 549 DCTELEM * workspace) 550 { 551 if (simd_support & JSIMD_ARM_NEON) 552 jsimd_quantize_neon(coef_block, divisors, workspace); 553 } 554 555 GLOBAL(void) 556 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 557 FAST_FLOAT * workspace) 558 { 559 } 560 561 GLOBAL(int) 562 jsimd_can_idct_2x2 (void) 563 { 564 init_simd(); 565 566 /* The code is optimised for these values only */ 567 if (DCTSIZE != 8) 568 return 0; 569 if (sizeof(JCOEF) != 2) 570 return 0; 571 if (BITS_IN_JSAMPLE != 8) 572 return 0; 573 if (sizeof(JDIMENSION) != 4) 574 return 0; 575 if (sizeof(ISLOW_MULT_TYPE) != 2) 576 return 0; 577 578 if (simd_support & JSIMD_ARM_NEON) 579 return 1; 580 581 return 0; 582 } 583 584 GLOBAL(int) 585 jsimd_can_idct_4x4 (void) 586 { 587 init_simd(); 588 589 /* The code is optimised for these values only */ 590 if (DCTSIZE != 8) 591 return 0; 592 if (sizeof(JCOEF) != 2) 593 return 0; 594 if (BITS_IN_JSAMPLE != 8) 595 return 0; 596 if (sizeof(JDIMENSION) != 4) 597 return 0; 598 if (sizeof(ISLOW_MULT_TYPE) != 2) 599 return 0; 600 601 if (simd_support & JSIMD_ARM_NEON) 602 return 1; 603 604 return 0; 605 } 606 607 GLOBAL(void) 608 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 609 JCOEFPTR coef_block, JSAMPARRAY output_buf, 610 JDIMENSION output_col) 611 { 612 if (simd_support & JSIMD_ARM_NEON) 613 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, 614 output_col); 615 } 616 617 GLOBAL(void) 618 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 619 JCOEFPTR coef_block, JSAMPARRAY output_buf, 620 JDIMENSION output_col) 621 { 622 if (simd_support & JSIMD_ARM_NEON) 623 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, 624 output_col); 625 } 626 627 GLOBAL(int) 628 jsimd_can_idct_islow (void) 629 { 630 init_simd(); 631 632 /* The code is optimised for these values only */ 633 if (DCTSIZE != 8) 634 return 0; 635 if (sizeof(JCOEF) != 2) 636 return 0; 637 if (BITS_IN_JSAMPLE != 8) 638 return 0; 639 if (sizeof(JDIMENSION) != 4) 640 return 0; 641 if (sizeof(ISLOW_MULT_TYPE) != 2) 642 return 0; 643 644 if (simd_support & JSIMD_ARM_NEON) 645 return 1; 646 647 return 0; 648 } 649 650 GLOBAL(int) 651 jsimd_can_idct_ifast (void) 652 { 653 init_simd(); 654 655 /* The code is optimised for these values only */ 656 if (DCTSIZE != 8) 657 return 0; 658 if (sizeof(JCOEF) != 2) 659 return 0; 660 if (BITS_IN_JSAMPLE != 8) 661 return 0; 662 if (sizeof(JDIMENSION) != 4) 663 return 0; 664 if (sizeof(IFAST_MULT_TYPE) != 2) 665 return 0; 666 if (IFAST_SCALE_BITS != 2) 667 return 0; 668 669 if (simd_support & JSIMD_ARM_NEON) 670 return 1; 671 672 return 0; 673 } 674 675 GLOBAL(int) 676 jsimd_can_idct_float (void) 677 { 678 init_simd(); 679 680 return 0; 681 } 682 683 GLOBAL(void) 684 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 685 JCOEFPTR coef_block, JSAMPARRAY output_buf, 686 JDIMENSION output_col) 687 { 688 if (simd_support & JSIMD_ARM_NEON) 689 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, 690 output_col); 691 } 692 693 GLOBAL(void) 694 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 695 JCOEFPTR coef_block, JSAMPARRAY output_buf, 696 JDIMENSION output_col) 697 { 698 if (simd_support & JSIMD_ARM_NEON) 699 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, 700 output_col); 701 } 702 703 GLOBAL(void) 704 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 705 JCOEFPTR coef_block, JSAMPARRAY output_buf, 706 JDIMENSION output_col) 707 { 708 } 709