1 /* 2 * jsimd_arm.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies). 6 * Copyright (C) 2009-2011, 2013-2014, 2016, D. R. Commander. 7 * Copyright (C) 2015-2016, Matthieu Darbois. 8 * 9 * Based on the x86 SIMD extension for IJG JPEG library, 10 * Copyright (C) 1999-2006, MIYASAKA Masaru. 11 * For conditions of distribution and use, see copyright notice in jsimdext.inc 12 * 13 * This file contains the interface between the "normal" portions 14 * of the library and the SIMD implementations when running on a 15 * 32-bit ARM architecture. 16 */ 17 18 #define JPEG_INTERNALS 19 #include "../jinclude.h" 20 #include "../jpeglib.h" 21 #include "../jsimd.h" 22 #include "../jdct.h" 23 #include "../jsimddct.h" 24 #include "jsimd.h" 25 26 #include <stdio.h> 27 #include <string.h> 28 #include <ctype.h> 29 30 static unsigned int simd_support = ~0; 31 static unsigned int simd_huffman = 1; 32 33 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 34 35 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) 36 37 LOCAL(int) 38 check_feature (char *buffer, char *feature) 39 { 40 char *p; 41 if (*feature == 0) 42 return 0; 43 if (strncmp(buffer, "Features", 8) != 0) 44 return 0; 45 buffer += 8; 46 while (isspace(*buffer)) 47 buffer++; 48 49 /* Check if 'feature' is present in the buffer as a separate word */ 50 while ((p = strstr(buffer, feature))) { 51 if (p > buffer && !isspace(*(p - 1))) { 52 buffer++; 53 continue; 54 } 55 p += strlen(feature); 56 if (*p != 0 && !isspace(*p)) { 57 buffer++; 58 continue; 59 } 60 return 1; 61 } 62 return 0; 63 } 64 65 LOCAL(int) 66 parse_proc_cpuinfo (int bufsize) 67 { 68 char *buffer = (char *)malloc(bufsize); 69 FILE *fd; 70 simd_support = 0; 71 72 if (!buffer) 73 return 0; 74 75 fd = fopen("/proc/cpuinfo", "r"); 76 if (fd) { 77 while (fgets(buffer, bufsize, fd)) { 78 if (!strchr(buffer, '\n') && !feof(fd)) { 79 /* "impossible" happened - insufficient size of the buffer! */ 80 fclose(fd); 81 free(buffer); 82 return 0; 83 } 84 if (check_feature(buffer, "neon")) 85 simd_support |= JSIMD_ARM_NEON; 86 } 87 fclose(fd); 88 } 89 free(buffer); 90 return 1; 91 } 92 93 #endif 94 95 /* 96 * Check what SIMD accelerations are supported. 97 * 98 * FIXME: This code is racy under a multi-threaded environment. 99 */ 100 LOCAL(void) 101 init_simd (void) 102 { 103 char *env = NULL; 104 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 105 int bufsize = 1024; /* an initial guess for the line buffer size limit */ 106 #endif 107 108 if (simd_support != ~0U) 109 return; 110 111 simd_support = 0; 112 113 #if defined(__ARM_NEON__) 114 simd_support |= JSIMD_ARM_NEON; 115 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 116 /* We still have a chance to use NEON regardless of globally used 117 * -mcpu/-mfpu options passed to gcc by performing runtime detection via 118 * /proc/cpuinfo parsing on linux/android */ 119 while (!parse_proc_cpuinfo(bufsize)) { 120 bufsize *= 2; 121 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) 122 break; 123 } 124 #endif 125 126 /* Force different settings through environment variables */ 127 env = getenv("JSIMD_FORCENEON"); 128 if ((env != NULL) && (strcmp(env, "1") == 0)) 129 simd_support = JSIMD_ARM_NEON; 130 env = getenv("JSIMD_FORCENONE"); 131 if ((env != NULL) && (strcmp(env, "1") == 0)) 132 simd_support = 0; 133 env = getenv("JSIMD_NOHUFFENC"); 134 if ((env != NULL) && (strcmp(env, "1") == 0)) 135 simd_huffman = 0; 136 } 137 138 GLOBAL(int) 139 jsimd_can_rgb_ycc (void) 140 { 141 init_simd(); 142 143 /* The code is optimised for these values only */ 144 if (BITS_IN_JSAMPLE != 8) 145 return 0; 146 if (sizeof(JDIMENSION) != 4) 147 return 0; 148 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 149 return 0; 150 151 if (simd_support & JSIMD_ARM_NEON) 152 return 1; 153 154 return 0; 155 } 156 157 GLOBAL(int) 158 jsimd_can_rgb_gray (void) 159 { 160 init_simd(); 161 162 return 0; 163 } 164 165 GLOBAL(int) 166 jsimd_can_ycc_rgb (void) 167 { 168 init_simd(); 169 170 /* The code is optimised for these values only */ 171 if (BITS_IN_JSAMPLE != 8) 172 return 0; 173 if (sizeof(JDIMENSION) != 4) 174 return 0; 175 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 176 return 0; 177 178 if (simd_support & JSIMD_ARM_NEON) 179 return 1; 180 181 return 0; 182 } 183 184 GLOBAL(int) 185 jsimd_can_ycc_rgb565 (void) 186 { 187 init_simd(); 188 189 /* The code is optimised for these values only */ 190 if (BITS_IN_JSAMPLE != 8) 191 return 0; 192 if (sizeof(JDIMENSION) != 4) 193 return 0; 194 195 if (simd_support & JSIMD_ARM_NEON) 196 return 1; 197 198 return 0; 199 } 200 201 GLOBAL(void) 202 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 203 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 204 JDIMENSION output_row, int num_rows) 205 { 206 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 207 208 switch(cinfo->in_color_space) { 209 case JCS_EXT_RGB: 210 neonfct=jsimd_extrgb_ycc_convert_neon; 211 break; 212 case JCS_EXT_RGBX: 213 case JCS_EXT_RGBA: 214 neonfct=jsimd_extrgbx_ycc_convert_neon; 215 break; 216 case JCS_EXT_BGR: 217 neonfct=jsimd_extbgr_ycc_convert_neon; 218 break; 219 case JCS_EXT_BGRX: 220 case JCS_EXT_BGRA: 221 neonfct=jsimd_extbgrx_ycc_convert_neon; 222 break; 223 case JCS_EXT_XBGR: 224 case JCS_EXT_ABGR: 225 neonfct=jsimd_extxbgr_ycc_convert_neon; 226 break; 227 case JCS_EXT_XRGB: 228 case JCS_EXT_ARGB: 229 neonfct=jsimd_extxrgb_ycc_convert_neon; 230 break; 231 default: 232 neonfct=jsimd_extrgb_ycc_convert_neon; 233 break; 234 } 235 236 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows); 237 } 238 239 GLOBAL(void) 240 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 241 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 242 JDIMENSION output_row, int num_rows) 243 { 244 } 245 246 GLOBAL(void) 247 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 248 JSAMPIMAGE input_buf, JDIMENSION input_row, 249 JSAMPARRAY output_buf, int num_rows) 250 { 251 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 252 253 switch(cinfo->out_color_space) { 254 case JCS_EXT_RGB: 255 neonfct=jsimd_ycc_extrgb_convert_neon; 256 break; 257 case JCS_EXT_RGBX: 258 case JCS_EXT_RGBA: 259 neonfct=jsimd_ycc_extrgbx_convert_neon; 260 break; 261 case JCS_EXT_BGR: 262 neonfct=jsimd_ycc_extbgr_convert_neon; 263 break; 264 case JCS_EXT_BGRX: 265 case JCS_EXT_BGRA: 266 neonfct=jsimd_ycc_extbgrx_convert_neon; 267 break; 268 case JCS_EXT_XBGR: 269 case JCS_EXT_ABGR: 270 neonfct=jsimd_ycc_extxbgr_convert_neon; 271 break; 272 case JCS_EXT_XRGB: 273 case JCS_EXT_ARGB: 274 neonfct=jsimd_ycc_extxrgb_convert_neon; 275 break; 276 default: 277 neonfct=jsimd_ycc_extrgb_convert_neon; 278 break; 279 } 280 281 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows); 282 } 283 284 GLOBAL(void) 285 jsimd_ycc_rgb565_convert (j_decompress_ptr cinfo, 286 JSAMPIMAGE input_buf, JDIMENSION input_row, 287 JSAMPARRAY output_buf, int num_rows) 288 { 289 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row, 290 output_buf, num_rows); 291 } 292 293 GLOBAL(int) 294 jsimd_can_h2v2_downsample (void) 295 { 296 init_simd(); 297 298 return 0; 299 } 300 301 GLOBAL(int) 302 jsimd_can_h2v1_downsample (void) 303 { 304 init_simd(); 305 306 return 0; 307 } 308 309 GLOBAL(void) 310 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 311 JSAMPARRAY input_data, JSAMPARRAY output_data) 312 { 313 } 314 315 GLOBAL(void) 316 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 317 JSAMPARRAY input_data, JSAMPARRAY output_data) 318 { 319 } 320 321 GLOBAL(int) 322 jsimd_can_h2v2_upsample (void) 323 { 324 init_simd(); 325 326 return 0; 327 } 328 329 GLOBAL(int) 330 jsimd_can_h2v1_upsample (void) 331 { 332 init_simd(); 333 334 return 0; 335 } 336 337 GLOBAL(void) 338 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 339 jpeg_component_info *compptr, 340 JSAMPARRAY input_data, 341 JSAMPARRAY *output_data_ptr) 342 { 343 } 344 345 GLOBAL(void) 346 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 347 jpeg_component_info *compptr, 348 JSAMPARRAY input_data, 349 JSAMPARRAY *output_data_ptr) 350 { 351 } 352 353 GLOBAL(int) 354 jsimd_can_h2v2_fancy_upsample (void) 355 { 356 init_simd(); 357 358 return 0; 359 } 360 361 GLOBAL(int) 362 jsimd_can_h2v1_fancy_upsample (void) 363 { 364 init_simd(); 365 366 /* The code is optimised for these values only */ 367 if (BITS_IN_JSAMPLE != 8) 368 return 0; 369 if (sizeof(JDIMENSION) != 4) 370 return 0; 371 372 if (simd_support & JSIMD_ARM_NEON) 373 return 1; 374 375 return 0; 376 } 377 378 GLOBAL(void) 379 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 380 jpeg_component_info *compptr, 381 JSAMPARRAY input_data, 382 JSAMPARRAY *output_data_ptr) 383 { 384 } 385 386 GLOBAL(void) 387 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 388 jpeg_component_info *compptr, 389 JSAMPARRAY input_data, 390 JSAMPARRAY *output_data_ptr) 391 { 392 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, 393 compptr->downsampled_width, input_data, 394 output_data_ptr); 395 } 396 397 GLOBAL(int) 398 jsimd_can_h2v2_merged_upsample (void) 399 { 400 init_simd(); 401 402 return 0; 403 } 404 405 GLOBAL(int) 406 jsimd_can_h2v1_merged_upsample (void) 407 { 408 init_simd(); 409 410 return 0; 411 } 412 413 GLOBAL(void) 414 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 415 JSAMPIMAGE input_buf, 416 JDIMENSION in_row_group_ctr, 417 JSAMPARRAY output_buf) 418 { 419 } 420 421 GLOBAL(void) 422 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 423 JSAMPIMAGE input_buf, 424 JDIMENSION in_row_group_ctr, 425 JSAMPARRAY output_buf) 426 { 427 } 428 429 GLOBAL(int) 430 jsimd_can_convsamp (void) 431 { 432 init_simd(); 433 434 /* The code is optimised for these values only */ 435 if (DCTSIZE != 8) 436 return 0; 437 if (BITS_IN_JSAMPLE != 8) 438 return 0; 439 if (sizeof(JDIMENSION) != 4) 440 return 0; 441 if (sizeof(DCTELEM) != 2) 442 return 0; 443 444 if (simd_support & JSIMD_ARM_NEON) 445 return 1; 446 447 return 0; 448 } 449 450 GLOBAL(int) 451 jsimd_can_convsamp_float (void) 452 { 453 init_simd(); 454 455 return 0; 456 } 457 458 GLOBAL(void) 459 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 460 DCTELEM *workspace) 461 { 462 jsimd_convsamp_neon(sample_data, start_col, workspace); 463 } 464 465 GLOBAL(void) 466 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 467 FAST_FLOAT *workspace) 468 { 469 } 470 471 GLOBAL(int) 472 jsimd_can_fdct_islow (void) 473 { 474 init_simd(); 475 476 return 0; 477 } 478 479 GLOBAL(int) 480 jsimd_can_fdct_ifast (void) 481 { 482 init_simd(); 483 484 /* The code is optimised for these values only */ 485 if (DCTSIZE != 8) 486 return 0; 487 if (sizeof(DCTELEM) != 2) 488 return 0; 489 490 if (simd_support & JSIMD_ARM_NEON) 491 return 1; 492 493 return 0; 494 } 495 496 GLOBAL(int) 497 jsimd_can_fdct_float (void) 498 { 499 init_simd(); 500 501 return 0; 502 } 503 504 GLOBAL(void) 505 jsimd_fdct_islow (DCTELEM *data) 506 { 507 } 508 509 GLOBAL(void) 510 jsimd_fdct_ifast (DCTELEM *data) 511 { 512 jsimd_fdct_ifast_neon(data); 513 } 514 515 GLOBAL(void) 516 jsimd_fdct_float (FAST_FLOAT *data) 517 { 518 } 519 520 GLOBAL(int) 521 jsimd_can_quantize (void) 522 { 523 init_simd(); 524 525 /* The code is optimised for these values only */ 526 if (DCTSIZE != 8) 527 return 0; 528 if (sizeof(JCOEF) != 2) 529 return 0; 530 if (sizeof(DCTELEM) != 2) 531 return 0; 532 533 if (simd_support & JSIMD_ARM_NEON) 534 return 1; 535 536 return 0; 537 } 538 539 GLOBAL(int) 540 jsimd_can_quantize_float (void) 541 { 542 init_simd(); 543 544 return 0; 545 } 546 547 GLOBAL(void) 548 jsimd_quantize (JCOEFPTR coef_block, DCTELEM *divisors, 549 DCTELEM *workspace) 550 { 551 jsimd_quantize_neon(coef_block, divisors, workspace); 552 } 553 554 GLOBAL(void) 555 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT *divisors, 556 FAST_FLOAT *workspace) 557 { 558 } 559 560 GLOBAL(int) 561 jsimd_can_idct_2x2 (void) 562 { 563 init_simd(); 564 565 /* The code is optimised for these values only */ 566 if (DCTSIZE != 8) 567 return 0; 568 if (sizeof(JCOEF) != 2) 569 return 0; 570 if (BITS_IN_JSAMPLE != 8) 571 return 0; 572 if (sizeof(JDIMENSION) != 4) 573 return 0; 574 if (sizeof(ISLOW_MULT_TYPE) != 2) 575 return 0; 576 577 if (simd_support & JSIMD_ARM_NEON) 578 return 1; 579 580 return 0; 581 } 582 583 GLOBAL(int) 584 jsimd_can_idct_4x4 (void) 585 { 586 init_simd(); 587 588 /* The code is optimised for these values only */ 589 if (DCTSIZE != 8) 590 return 0; 591 if (sizeof(JCOEF) != 2) 592 return 0; 593 if (BITS_IN_JSAMPLE != 8) 594 return 0; 595 if (sizeof(JDIMENSION) != 4) 596 return 0; 597 if (sizeof(ISLOW_MULT_TYPE) != 2) 598 return 0; 599 600 if (simd_support & JSIMD_ARM_NEON) 601 return 1; 602 603 return 0; 604 } 605 606 GLOBAL(void) 607 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 608 JCOEFPTR coef_block, JSAMPARRAY output_buf, 609 JDIMENSION output_col) 610 { 611 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, 612 output_col); 613 } 614 615 GLOBAL(void) 616 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info *compptr, 617 JCOEFPTR coef_block, JSAMPARRAY output_buf, 618 JDIMENSION output_col) 619 { 620 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, 621 output_col); 622 } 623 624 GLOBAL(int) 625 jsimd_can_idct_islow (void) 626 { 627 init_simd(); 628 629 /* The code is optimised for these values only */ 630 if (DCTSIZE != 8) 631 return 0; 632 if (sizeof(JCOEF) != 2) 633 return 0; 634 if (BITS_IN_JSAMPLE != 8) 635 return 0; 636 if (sizeof(JDIMENSION) != 4) 637 return 0; 638 if (sizeof(ISLOW_MULT_TYPE) != 2) 639 return 0; 640 641 if (simd_support & JSIMD_ARM_NEON) 642 return 1; 643 644 return 0; 645 } 646 647 GLOBAL(int) 648 jsimd_can_idct_ifast (void) 649 { 650 init_simd(); 651 652 /* The code is optimised for these values only */ 653 if (DCTSIZE != 8) 654 return 0; 655 if (sizeof(JCOEF) != 2) 656 return 0; 657 if (BITS_IN_JSAMPLE != 8) 658 return 0; 659 if (sizeof(JDIMENSION) != 4) 660 return 0; 661 if (sizeof(IFAST_MULT_TYPE) != 2) 662 return 0; 663 if (IFAST_SCALE_BITS != 2) 664 return 0; 665 666 if (simd_support & JSIMD_ARM_NEON) 667 return 1; 668 669 return 0; 670 } 671 672 GLOBAL(int) 673 jsimd_can_idct_float (void) 674 { 675 init_simd(); 676 677 return 0; 678 } 679 680 GLOBAL(void) 681 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info *compptr, 682 JCOEFPTR coef_block, JSAMPARRAY output_buf, 683 JDIMENSION output_col) 684 { 685 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, 686 output_col); 687 } 688 689 GLOBAL(void) 690 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info *compptr, 691 JCOEFPTR coef_block, JSAMPARRAY output_buf, 692 JDIMENSION output_col) 693 { 694 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, 695 output_col); 696 } 697 698 GLOBAL(void) 699 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info *compptr, 700 JCOEFPTR coef_block, JSAMPARRAY output_buf, 701 JDIMENSION output_col) 702 { 703 } 704 705 GLOBAL(int) 706 jsimd_can_huff_encode_one_block (void) 707 { 708 init_simd(); 709 710 if (DCTSIZE != 8) 711 return 0; 712 if (sizeof(JCOEF) != 2) 713 return 0; 714 715 if (simd_support & JSIMD_ARM_NEON && simd_huffman) 716 return 1; 717 718 return 0; 719 } 720 721 GLOBAL(JOCTET*) 722 jsimd_huff_encode_one_block (void *state, JOCTET *buffer, JCOEFPTR block, 723 int last_dc_val, c_derived_tbl *dctbl, 724 c_derived_tbl *actbl) 725 { 726 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val, 727 dctbl, actbl); 728 } 729