1 /* 2 * jsimd_arm.c 3 * 4 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 5 * Copyright 2009-2011 D. R. Commander 6 * 7 * Based on the x86 SIMD extension for IJG JPEG library, 8 * Copyright (C) 1999-2006, MIYASAKA Masaru. 9 * For conditions of distribution and use, see copyright notice in jsimdext.inc 10 * 11 * This file contains the interface between the "normal" portions 12 * of the library and the SIMD implementations when running on 13 * ARM architecture. 14 * 15 * Based on the stubs from 'jsimd_none.c' 16 */ 17 18 #define JPEG_INTERNALS 19 #include "../jinclude.h" 20 #include "../jpeglib.h" 21 #include "../jsimd.h" 22 #include "../jdct.h" 23 #include "../jsimddct.h" 24 #include "jsimd.h" 25 26 #include <stdio.h> 27 #include <string.h> 28 #include <ctype.h> 29 30 static unsigned int simd_support = ~0; 31 32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 33 34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024) 35 36 LOCAL(int) 37 check_feature (char *buffer, char *feature) 38 { 39 char *p; 40 if (*feature == 0) 41 return 0; 42 if (strncmp(buffer, "Features", 8) != 0) 43 return 0; 44 buffer += 8; 45 while (isspace(*buffer)) 46 buffer++; 47 48 /* Check if 'feature' is present in the buffer as a separate word */ 49 while ((p = strstr(buffer, feature))) { 50 if (p > buffer && !isspace(*(p - 1))) { 51 buffer++; 52 continue; 53 } 54 p += strlen(feature); 55 if (*p != 0 && !isspace(*p)) { 56 buffer++; 57 continue; 58 } 59 return 1; 60 } 61 return 0; 62 } 63 64 LOCAL(int) 65 parse_proc_cpuinfo (int bufsize) 66 { 67 char *buffer = (char *)malloc(bufsize); 68 FILE *fd; 69 simd_support = 0; 70 71 if (!buffer) 72 return 0; 73 74 fd = fopen("/proc/cpuinfo", "r"); 75 if (fd) { 76 while (fgets(buffer, bufsize, fd)) { 77 if (!strchr(buffer, '\n') && !feof(fd)) { 78 /* "impossible" happened - insufficient size of the buffer! */ 79 fclose(fd); 80 free(buffer); 81 return 0; 82 } 83 if (check_feature(buffer, "neon")) 84 simd_support |= JSIMD_ARM_NEON; 85 } 86 fclose(fd); 87 } 88 free(buffer); 89 return 1; 90 } 91 92 #endif 93 94 /* 95 * Check what SIMD accelerations are supported. 96 * 97 * FIXME: This code is racy under a multi-threaded environment. 98 */ 99 LOCAL(void) 100 init_simd (void) 101 { 102 char *env = NULL; 103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 104 int bufsize = 1024; /* an initial guess for the line buffer size limit */ 105 #endif 106 107 if (simd_support != ~0U) 108 return; 109 110 simd_support = 0; 111 112 #if defined(__ARM_NEON__) 113 simd_support |= JSIMD_ARM_NEON; 114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__) 115 /* We still have a chance to use NEON regardless of globally used 116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via 117 * /proc/cpuinfo parsing on linux/android */ 118 while (!parse_proc_cpuinfo(bufsize)) { 119 bufsize *= 2; 120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT) 121 break; 122 } 123 #endif 124 125 /* Force different settings through environment variables */ 126 env = getenv("JSIMD_FORCE_ARM_NEON"); 127 if ((env != NULL) && (strcmp(env, "1") == 0)) 128 simd_support &= JSIMD_ARM_NEON; 129 env = getenv("JSIMD_FORCE_NO_SIMD"); 130 if ((env != NULL) && (strcmp(env, "1") == 0)) 131 simd_support = 0; 132 } 133 134 GLOBAL(int) 135 jsimd_can_rgb_ycc (void) 136 { 137 init_simd(); 138 139 /* The code is optimised for these values only */ 140 if (BITS_IN_JSAMPLE != 8) 141 return 0; 142 if (sizeof(JDIMENSION) != 4) 143 return 0; 144 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 145 return 0; 146 147 if (simd_support & JSIMD_ARM_NEON) 148 return 1; 149 150 return 0; 151 } 152 153 GLOBAL(int) 154 jsimd_can_rgb_gray (void) 155 { 156 init_simd(); 157 158 return 0; 159 } 160 161 GLOBAL(int) 162 jsimd_can_ycc_rgb (void) 163 { 164 init_simd(); 165 166 /* The code is optimised for these values only */ 167 if (BITS_IN_JSAMPLE != 8) 168 return 0; 169 if (sizeof(JDIMENSION) != 4) 170 return 0; 171 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4)) 172 return 0; 173 if (simd_support & JSIMD_ARM_NEON) 174 return 1; 175 176 return 0; 177 } 178 179 GLOBAL(void) 180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo, 181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 182 JDIMENSION output_row, int num_rows) 183 { 184 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); 185 186 switch(cinfo->in_color_space) 187 { 188 case JCS_EXT_RGB: 189 neonfct=jsimd_extrgb_ycc_convert_neon; 190 break; 191 case JCS_EXT_RGBX: 192 case JCS_EXT_RGBA: 193 neonfct=jsimd_extrgbx_ycc_convert_neon; 194 break; 195 case JCS_EXT_BGR: 196 neonfct=jsimd_extbgr_ycc_convert_neon; 197 break; 198 case JCS_EXT_BGRX: 199 case JCS_EXT_BGRA: 200 neonfct=jsimd_extbgrx_ycc_convert_neon; 201 break; 202 case JCS_EXT_XBGR: 203 case JCS_EXT_ABGR: 204 neonfct=jsimd_extxbgr_ycc_convert_neon; 205 break; 206 case JCS_EXT_XRGB: 207 case JCS_EXT_ARGB: 208 neonfct=jsimd_extxrgb_ycc_convert_neon; 209 break; 210 default: 211 neonfct=jsimd_extrgb_ycc_convert_neon; 212 break; 213 } 214 215 if (simd_support & JSIMD_ARM_NEON) 216 neonfct(cinfo->image_width, input_buf, 217 output_buf, output_row, num_rows); 218 } 219 220 GLOBAL(void) 221 jsimd_rgb_gray_convert (j_compress_ptr cinfo, 222 JSAMPARRAY input_buf, JSAMPIMAGE output_buf, 223 JDIMENSION output_row, int num_rows) 224 { 225 } 226 227 GLOBAL(void) 228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo, 229 JSAMPIMAGE input_buf, JDIMENSION input_row, 230 JSAMPARRAY output_buf, int num_rows) 231 { 232 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); 233 234 switch(cinfo->out_color_space) 235 { 236 case JCS_EXT_RGB: 237 neonfct=jsimd_ycc_extrgb_convert_neon; 238 break; 239 case JCS_EXT_RGBX: 240 case JCS_EXT_RGBA: 241 neonfct=jsimd_ycc_extrgbx_convert_neon; 242 break; 243 case JCS_EXT_BGR: 244 neonfct=jsimd_ycc_extbgr_convert_neon; 245 break; 246 case JCS_EXT_BGRX: 247 case JCS_EXT_BGRA: 248 neonfct=jsimd_ycc_extbgrx_convert_neon; 249 break; 250 case JCS_EXT_XBGR: 251 case JCS_EXT_ABGR: 252 neonfct=jsimd_ycc_extxbgr_convert_neon; 253 break; 254 case JCS_EXT_XRGB: 255 case JCS_EXT_ARGB: 256 neonfct=jsimd_ycc_extxrgb_convert_neon; 257 break; 258 default: 259 neonfct=jsimd_ycc_extrgb_convert_neon; 260 break; 261 } 262 263 if (simd_support & JSIMD_ARM_NEON) 264 neonfct(cinfo->output_width, input_buf, 265 input_row, output_buf, num_rows); 266 } 267 268 GLOBAL(int) 269 jsimd_can_h2v2_downsample (void) 270 { 271 init_simd(); 272 273 return 0; 274 } 275 276 GLOBAL(int) 277 jsimd_can_h2v1_downsample (void) 278 { 279 init_simd(); 280 281 return 0; 282 } 283 284 GLOBAL(void) 285 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 286 JSAMPARRAY input_data, JSAMPARRAY output_data) 287 { 288 } 289 290 GLOBAL(void) 291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, 292 JSAMPARRAY input_data, JSAMPARRAY output_data) 293 { 294 } 295 296 GLOBAL(int) 297 jsimd_can_h2v2_upsample (void) 298 { 299 init_simd(); 300 301 return 0; 302 } 303 304 GLOBAL(int) 305 jsimd_can_h2v1_upsample (void) 306 { 307 init_simd(); 308 309 return 0; 310 } 311 312 GLOBAL(void) 313 jsimd_h2v2_upsample (j_decompress_ptr cinfo, 314 jpeg_component_info * compptr, 315 JSAMPARRAY input_data, 316 JSAMPARRAY * output_data_ptr) 317 { 318 } 319 320 GLOBAL(void) 321 jsimd_h2v1_upsample (j_decompress_ptr cinfo, 322 jpeg_component_info * compptr, 323 JSAMPARRAY input_data, 324 JSAMPARRAY * output_data_ptr) 325 { 326 } 327 328 GLOBAL(int) 329 jsimd_can_h2v2_fancy_upsample (void) 330 { 331 init_simd(); 332 333 return 0; 334 } 335 336 GLOBAL(int) 337 jsimd_can_h2v1_fancy_upsample (void) 338 { 339 init_simd(); 340 341 /* The code is optimised for these values only */ 342 if (BITS_IN_JSAMPLE != 8) 343 return 0; 344 if (sizeof(JDIMENSION) != 4) 345 return 0; 346 347 if (simd_support & JSIMD_ARM_NEON) 348 return 1; 349 350 return 0; 351 } 352 353 GLOBAL(void) 354 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo, 355 jpeg_component_info * compptr, 356 JSAMPARRAY input_data, 357 JSAMPARRAY * output_data_ptr) 358 { 359 } 360 361 GLOBAL(void) 362 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo, 363 jpeg_component_info * compptr, 364 JSAMPARRAY input_data, 365 JSAMPARRAY * output_data_ptr) 366 { 367 if (simd_support & JSIMD_ARM_NEON) 368 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor, 369 compptr->downsampled_width, input_data, output_data_ptr); 370 } 371 372 GLOBAL(int) 373 jsimd_can_h2v2_merged_upsample (void) 374 { 375 init_simd(); 376 377 return 0; 378 } 379 380 GLOBAL(int) 381 jsimd_can_h2v1_merged_upsample (void) 382 { 383 init_simd(); 384 385 return 0; 386 } 387 388 GLOBAL(void) 389 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo, 390 JSAMPIMAGE input_buf, 391 JDIMENSION in_row_group_ctr, 392 JSAMPARRAY output_buf) 393 { 394 } 395 396 GLOBAL(void) 397 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo, 398 JSAMPIMAGE input_buf, 399 JDIMENSION in_row_group_ctr, 400 JSAMPARRAY output_buf) 401 { 402 } 403 404 GLOBAL(int) 405 jsimd_can_convsamp (void) 406 { 407 init_simd(); 408 409 /* The code is optimised for these values only */ 410 if (DCTSIZE != 8) 411 return 0; 412 if (BITS_IN_JSAMPLE != 8) 413 return 0; 414 if (sizeof(JDIMENSION) != 4) 415 return 0; 416 if (sizeof(DCTELEM) != 2) 417 return 0; 418 419 if (simd_support & JSIMD_ARM_NEON) 420 return 1; 421 422 return 0; 423 } 424 425 GLOBAL(int) 426 jsimd_can_convsamp_float (void) 427 { 428 init_simd(); 429 430 return 0; 431 } 432 433 GLOBAL(void) 434 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, 435 DCTELEM * workspace) 436 { 437 if (simd_support & JSIMD_ARM_NEON) 438 jsimd_convsamp_neon(sample_data, start_col, workspace); 439 } 440 441 GLOBAL(void) 442 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, 443 FAST_FLOAT * workspace) 444 { 445 } 446 447 GLOBAL(int) 448 jsimd_can_fdct_islow (void) 449 { 450 init_simd(); 451 452 return 0; 453 } 454 455 GLOBAL(int) 456 jsimd_can_fdct_ifast (void) 457 { 458 init_simd(); 459 460 /* The code is optimised for these values only */ 461 if (DCTSIZE != 8) 462 return 0; 463 if (sizeof(DCTELEM) != 2) 464 return 0; 465 466 if (simd_support & JSIMD_ARM_NEON) 467 return 1; 468 469 return 0; 470 } 471 472 GLOBAL(int) 473 jsimd_can_fdct_float (void) 474 { 475 init_simd(); 476 477 return 0; 478 } 479 480 GLOBAL(void) 481 jsimd_fdct_islow (DCTELEM * data) 482 { 483 } 484 485 GLOBAL(void) 486 jsimd_fdct_ifast (DCTELEM * data) 487 { 488 if (simd_support & JSIMD_ARM_NEON) 489 jsimd_fdct_ifast_neon(data); 490 } 491 492 GLOBAL(void) 493 jsimd_fdct_float (FAST_FLOAT * data) 494 { 495 } 496 497 GLOBAL(int) 498 jsimd_can_quantize (void) 499 { 500 init_simd(); 501 502 /* The code is optimised for these values only */ 503 if (DCTSIZE != 8) 504 return 0; 505 if (sizeof(JCOEF) != 2) 506 return 0; 507 if (sizeof(DCTELEM) != 2) 508 return 0; 509 510 if (simd_support & JSIMD_ARM_NEON) 511 return 1; 512 513 return 0; 514 } 515 516 GLOBAL(int) 517 jsimd_can_quantize_float (void) 518 { 519 init_simd(); 520 521 return 0; 522 } 523 524 GLOBAL(void) 525 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors, 526 DCTELEM * workspace) 527 { 528 if (simd_support & JSIMD_ARM_NEON) 529 jsimd_quantize_neon(coef_block, divisors, workspace); 530 } 531 532 GLOBAL(void) 533 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, 534 FAST_FLOAT * workspace) 535 { 536 } 537 538 GLOBAL(int) 539 jsimd_can_idct_2x2 (void) 540 { 541 init_simd(); 542 543 /* The code is optimised for these values only */ 544 if (DCTSIZE != 8) 545 return 0; 546 if (sizeof(JCOEF) != 2) 547 return 0; 548 if (BITS_IN_JSAMPLE != 8) 549 return 0; 550 if (sizeof(JDIMENSION) != 4) 551 return 0; 552 if (sizeof(ISLOW_MULT_TYPE) != 2) 553 return 0; 554 555 if ((simd_support & JSIMD_ARM_NEON)) 556 return 1; 557 558 return 0; 559 } 560 561 GLOBAL(int) 562 jsimd_can_idct_4x4 (void) 563 { 564 init_simd(); 565 566 /* The code is optimised for these values only */ 567 if (DCTSIZE != 8) 568 return 0; 569 if (sizeof(JCOEF) != 2) 570 return 0; 571 if (BITS_IN_JSAMPLE != 8) 572 return 0; 573 if (sizeof(JDIMENSION) != 4) 574 return 0; 575 if (sizeof(ISLOW_MULT_TYPE) != 2) 576 return 0; 577 578 if ((simd_support & JSIMD_ARM_NEON)) 579 return 1; 580 581 return 0; 582 } 583 584 GLOBAL(void) 585 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 586 JCOEFPTR coef_block, JSAMPARRAY output_buf, 587 JDIMENSION output_col) 588 { 589 if ((simd_support & JSIMD_ARM_NEON)) 590 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col); 591 } 592 593 GLOBAL(void) 594 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 595 JCOEFPTR coef_block, JSAMPARRAY output_buf, 596 JDIMENSION output_col) 597 { 598 if ((simd_support & JSIMD_ARM_NEON)) 599 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col); 600 } 601 602 GLOBAL(int) 603 jsimd_can_idct_islow (void) 604 { 605 init_simd(); 606 607 /* The code is optimised for these values only */ 608 if (DCTSIZE != 8) 609 return 0; 610 if (sizeof(JCOEF) != 2) 611 return 0; 612 if (BITS_IN_JSAMPLE != 8) 613 return 0; 614 if (sizeof(JDIMENSION) != 4) 615 return 0; 616 if (sizeof(ISLOW_MULT_TYPE) != 2) 617 return 0; 618 619 if (simd_support & JSIMD_ARM_NEON) 620 return 1; 621 622 return 0; 623 } 624 625 GLOBAL(int) 626 jsimd_can_idct_ifast (void) 627 { 628 init_simd(); 629 630 /* The code is optimised for these values only */ 631 if (DCTSIZE != 8) 632 return 0; 633 if (sizeof(JCOEF) != 2) 634 return 0; 635 if (BITS_IN_JSAMPLE != 8) 636 return 0; 637 if (sizeof(JDIMENSION) != 4) 638 return 0; 639 if (sizeof(IFAST_MULT_TYPE) != 2) 640 return 0; 641 if (IFAST_SCALE_BITS != 2) 642 return 0; 643 644 if ((simd_support & JSIMD_ARM_NEON)) 645 return 1; 646 647 return 0; 648 } 649 650 GLOBAL(int) 651 jsimd_can_idct_float (void) 652 { 653 init_simd(); 654 655 return 0; 656 } 657 658 GLOBAL(void) 659 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 660 JCOEFPTR coef_block, JSAMPARRAY output_buf, 661 JDIMENSION output_col) 662 { 663 if ((simd_support & JSIMD_ARM_NEON)) 664 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col); 665 } 666 667 GLOBAL(void) 668 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr, 669 JCOEFPTR coef_block, JSAMPARRAY output_buf, 670 JDIMENSION output_col) 671 { 672 if ((simd_support & JSIMD_ARM_NEON)) 673 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col); 674 } 675 676 GLOBAL(void) 677 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, 678 JCOEFPTR coef_block, JSAMPARRAY output_buf, 679 JDIMENSION output_col) 680 { 681 } 682 683