1 /* 2 * jcsample.c 3 * 4 * This file was part of the Independent JPEG Group's software: 5 * Copyright (C) 1991-1996, Thomas G. Lane. 6 * libjpeg-turbo Modifications: 7 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 8 * Copyright (C) 2014, MIPS Technologies, Inc., California. 9 * Copyright (C) 2015, D. R. Commander. 10 * For conditions of distribution and use, see the accompanying README.ijg 11 * file. 12 * 13 * This file contains downsampling routines. 14 * 15 * Downsampling input data is counted in "row groups". A row group 16 * is defined to be max_v_samp_factor pixel rows of each component, 17 * from which the downsampler produces v_samp_factor sample rows. 18 * A single row group is processed in each call to the downsampler module. 19 * 20 * The downsampler is responsible for edge-expansion of its output data 21 * to fill an integral number of DCT blocks horizontally. The source buffer 22 * may be modified if it is helpful for this purpose (the source buffer is 23 * allocated wide enough to correspond to the desired output width). 24 * The caller (the prep controller) is responsible for vertical padding. 25 * 26 * The downsampler may request "context rows" by setting need_context_rows 27 * during startup. In this case, the input arrays will contain at least 28 * one row group's worth of pixels above and below the passed-in data; 29 * the caller will create dummy rows at image top and bottom by replicating 30 * the first or last real pixel row. 31 * 32 * An excellent reference for image resampling is 33 * Digital Image Warping, George Wolberg, 1990. 34 * Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7. 35 * 36 * The downsampling algorithm used here is a simple average of the source 37 * pixels covered by the output pixel. The hi-falutin sampling literature 38 * refers to this as a "box filter". In general the characteristics of a box 39 * filter are not very good, but for the specific cases we normally use (1:1 40 * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not 41 * nearly so bad. If you intend to use other sampling ratios, you'd be well 42 * advised to improve this code. 43 * 44 * A simple input-smoothing capability is provided. This is mainly intended 45 * for cleaning up color-dithered GIF input files (if you find it inadequate, 46 * we suggest using an external filtering program such as pnmconvol). When 47 * enabled, each input pixel P is replaced by a weighted sum of itself and its 48 * eight neighbors. P's weight is 1-8*SF and each neighbor's weight is SF, 49 * where SF = (smoothing_factor / 1024). 50 * Currently, smoothing is only supported for 2h2v sampling factors. 51 */ 52 53 #define JPEG_INTERNALS 54 #include "jinclude.h" 55 #include "jpeglib.h" 56 #include "jsimd.h" 57 58 59 /* Pointer to routine to downsample a single component */ 60 typedef void (*downsample1_ptr) (j_compress_ptr cinfo, 61 jpeg_component_info *compptr, 62 JSAMPARRAY input_data, 63 JSAMPARRAY output_data); 64 65 /* Private subobject */ 66 67 typedef struct { 68 struct jpeg_downsampler pub; /* public fields */ 69 70 /* Downsampling method pointers, one per component */ 71 downsample1_ptr methods[MAX_COMPONENTS]; 72 } my_downsampler; 73 74 typedef my_downsampler *my_downsample_ptr; 75 76 77 /* 78 * Initialize for a downsampling pass. 79 */ 80 81 METHODDEF(void) 82 start_pass_downsample (j_compress_ptr cinfo) 83 { 84 /* no work for now */ 85 } 86 87 88 /* 89 * Expand a component horizontally from width input_cols to width output_cols, 90 * by duplicating the rightmost samples. 91 */ 92 93 LOCAL(void) 94 expand_right_edge (JSAMPARRAY image_data, int num_rows, 95 JDIMENSION input_cols, JDIMENSION output_cols) 96 { 97 register JSAMPROW ptr; 98 register JSAMPLE pixval; 99 register int count; 100 int row; 101 int numcols = (int) (output_cols - input_cols); 102 103 if (numcols > 0) { 104 for (row = 0; row < num_rows; row++) { 105 ptr = image_data[row] + input_cols; 106 pixval = ptr[-1]; /* don't need GETJSAMPLE() here */ 107 for (count = numcols; count > 0; count--) 108 *ptr++ = pixval; 109 } 110 } 111 } 112 113 114 /* 115 * Do downsampling for a whole row group (all components). 116 * 117 * In this version we simply downsample each component independently. 118 */ 119 120 METHODDEF(void) 121 sep_downsample (j_compress_ptr cinfo, 122 JSAMPIMAGE input_buf, JDIMENSION in_row_index, 123 JSAMPIMAGE output_buf, JDIMENSION out_row_group_index) 124 { 125 my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample; 126 int ci; 127 jpeg_component_info *compptr; 128 JSAMPARRAY in_ptr, out_ptr; 129 130 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 131 ci++, compptr++) { 132 in_ptr = input_buf[ci] + in_row_index; 133 out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor); 134 (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr); 135 } 136 } 137 138 139 /* 140 * Downsample pixel values of a single component. 141 * One row group is processed per call. 142 * This version handles arbitrary integral sampling ratios, without smoothing. 143 * Note that this version is not actually used for customary sampling ratios. 144 */ 145 146 METHODDEF(void) 147 int_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 148 JSAMPARRAY input_data, JSAMPARRAY output_data) 149 { 150 int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; 151 JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ 152 JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; 153 JSAMPROW inptr, outptr; 154 JLONG outvalue; 155 156 h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; 157 v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; 158 numpix = h_expand * v_expand; 159 numpix2 = numpix/2; 160 161 /* Expand input data enough to let all the output samples be generated 162 * by the standard loop. Special-casing padded output would be more 163 * efficient. 164 */ 165 expand_right_edge(input_data, cinfo->max_v_samp_factor, 166 cinfo->image_width, output_cols * h_expand); 167 168 inrow = 0; 169 for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { 170 outptr = output_data[outrow]; 171 for (outcol = 0, outcol_h = 0; outcol < output_cols; 172 outcol++, outcol_h += h_expand) { 173 outvalue = 0; 174 for (v = 0; v < v_expand; v++) { 175 inptr = input_data[inrow+v] + outcol_h; 176 for (h = 0; h < h_expand; h++) { 177 outvalue += (JLONG) GETJSAMPLE(*inptr++); 178 } 179 } 180 *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix); 181 } 182 inrow += v_expand; 183 } 184 } 185 186 187 /* 188 * Downsample pixel values of a single component. 189 * This version handles the special case of a full-size component, 190 * without smoothing. 191 */ 192 193 METHODDEF(void) 194 fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 195 JSAMPARRAY input_data, JSAMPARRAY output_data) 196 { 197 /* Copy the data */ 198 jcopy_sample_rows(input_data, 0, output_data, 0, 199 cinfo->max_v_samp_factor, cinfo->image_width); 200 /* Edge-expand */ 201 expand_right_edge(output_data, cinfo->max_v_samp_factor, 202 cinfo->image_width, compptr->width_in_blocks * DCTSIZE); 203 } 204 205 206 /* 207 * Downsample pixel values of a single component. 208 * This version handles the common case of 2:1 horizontal and 1:1 vertical, 209 * without smoothing. 210 * 211 * A note about the "bias" calculations: when rounding fractional values to 212 * integer, we do not want to always round 0.5 up to the next integer. 213 * If we did that, we'd introduce a noticeable bias towards larger values. 214 * Instead, this code is arranged so that 0.5 will be rounded up or down at 215 * alternate pixel locations (a simple ordered dither pattern). 216 */ 217 218 METHODDEF(void) 219 h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 220 JSAMPARRAY input_data, JSAMPARRAY output_data) 221 { 222 int outrow; 223 JDIMENSION outcol; 224 JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; 225 register JSAMPROW inptr, outptr; 226 register int bias; 227 228 /* Expand input data enough to let all the output samples be generated 229 * by the standard loop. Special-casing padded output would be more 230 * efficient. 231 */ 232 expand_right_edge(input_data, cinfo->max_v_samp_factor, 233 cinfo->image_width, output_cols * 2); 234 235 for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { 236 outptr = output_data[outrow]; 237 inptr = input_data[outrow]; 238 bias = 0; /* bias = 0,1,0,1,... for successive samples */ 239 for (outcol = 0; outcol < output_cols; outcol++) { 240 *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) 241 + bias) >> 1); 242 bias ^= 1; /* 0=>1, 1=>0 */ 243 inptr += 2; 244 } 245 } 246 } 247 248 249 /* 250 * Downsample pixel values of a single component. 251 * This version handles the standard case of 2:1 horizontal and 2:1 vertical, 252 * without smoothing. 253 */ 254 255 METHODDEF(void) 256 h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 257 JSAMPARRAY input_data, JSAMPARRAY output_data) 258 { 259 int inrow, outrow; 260 JDIMENSION outcol; 261 JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; 262 register JSAMPROW inptr0, inptr1, outptr; 263 register int bias; 264 265 /* Expand input data enough to let all the output samples be generated 266 * by the standard loop. Special-casing padded output would be more 267 * efficient. 268 */ 269 expand_right_edge(input_data, cinfo->max_v_samp_factor, 270 cinfo->image_width, output_cols * 2); 271 272 inrow = 0; 273 for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { 274 outptr = output_data[outrow]; 275 inptr0 = input_data[inrow]; 276 inptr1 = input_data[inrow+1]; 277 bias = 1; /* bias = 1,2,1,2,... for successive samples */ 278 for (outcol = 0; outcol < output_cols; outcol++) { 279 *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + 280 GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]) 281 + bias) >> 2); 282 bias ^= 3; /* 1=>2, 2=>1 */ 283 inptr0 += 2; inptr1 += 2; 284 } 285 inrow += 2; 286 } 287 } 288 289 290 #ifdef INPUT_SMOOTHING_SUPPORTED 291 292 /* 293 * Downsample pixel values of a single component. 294 * This version handles the standard case of 2:1 horizontal and 2:1 vertical, 295 * with smoothing. One row of context is required. 296 */ 297 298 METHODDEF(void) 299 h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 300 JSAMPARRAY input_data, JSAMPARRAY output_data) 301 { 302 int inrow, outrow; 303 JDIMENSION colctr; 304 JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; 305 register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr; 306 JLONG membersum, neighsum, memberscale, neighscale; 307 308 /* Expand input data enough to let all the output samples be generated 309 * by the standard loop. Special-casing padded output would be more 310 * efficient. 311 */ 312 expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, 313 cinfo->image_width, output_cols * 2); 314 315 /* We don't bother to form the individual "smoothed" input pixel values; 316 * we can directly compute the output which is the average of the four 317 * smoothed values. Each of the four member pixels contributes a fraction 318 * (1-8*SF) to its own smoothed image and a fraction SF to each of the three 319 * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final 320 * output. The four corner-adjacent neighbor pixels contribute a fraction 321 * SF to just one smoothed pixel, or SF/4 to the final output; while the 322 * eight edge-adjacent neighbors contribute SF to each of two smoothed 323 * pixels, or SF/2 overall. In order to use integer arithmetic, these 324 * factors are scaled by 2^16 = 65536. 325 * Also recall that SF = smoothing_factor / 1024. 326 */ 327 328 memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */ 329 neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */ 330 331 inrow = 0; 332 for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { 333 outptr = output_data[outrow]; 334 inptr0 = input_data[inrow]; 335 inptr1 = input_data[inrow+1]; 336 above_ptr = input_data[inrow-1]; 337 below_ptr = input_data[inrow+2]; 338 339 /* Special case for first column: pretend column -1 is same as column 0 */ 340 membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + 341 GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); 342 neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + 343 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + 344 GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[2]) + 345 GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[2]); 346 neighsum += neighsum; 347 neighsum += GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[2]) + 348 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[2]); 349 membersum = membersum * memberscale + neighsum * neighscale; 350 *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); 351 inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; 352 353 for (colctr = output_cols - 2; colctr > 0; colctr--) { 354 /* sum of pixels directly mapped to this output element */ 355 membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + 356 GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); 357 /* sum of edge-neighbor pixels */ 358 neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + 359 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + 360 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[2]) + 361 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[2]); 362 /* The edge-neighbors count twice as much as corner-neighbors */ 363 neighsum += neighsum; 364 /* Add in the corner-neighbors */ 365 neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[2]) + 366 GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[2]); 367 /* form final output scaled up by 2^16 */ 368 membersum = membersum * memberscale + neighsum * neighscale; 369 /* round, descale and output it */ 370 *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); 371 inptr0 += 2; inptr1 += 2; above_ptr += 2; below_ptr += 2; 372 } 373 374 /* Special case for last column */ 375 membersum = GETJSAMPLE(*inptr0) + GETJSAMPLE(inptr0[1]) + 376 GETJSAMPLE(*inptr1) + GETJSAMPLE(inptr1[1]); 377 neighsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(above_ptr[1]) + 378 GETJSAMPLE(*below_ptr) + GETJSAMPLE(below_ptr[1]) + 379 GETJSAMPLE(inptr0[-1]) + GETJSAMPLE(inptr0[1]) + 380 GETJSAMPLE(inptr1[-1]) + GETJSAMPLE(inptr1[1]); 381 neighsum += neighsum; 382 neighsum += GETJSAMPLE(above_ptr[-1]) + GETJSAMPLE(above_ptr[1]) + 383 GETJSAMPLE(below_ptr[-1]) + GETJSAMPLE(below_ptr[1]); 384 membersum = membersum * memberscale + neighsum * neighscale; 385 *outptr = (JSAMPLE) ((membersum + 32768) >> 16); 386 387 inrow += 2; 388 } 389 } 390 391 392 /* 393 * Downsample pixel values of a single component. 394 * This version handles the special case of a full-size component, 395 * with smoothing. One row of context is required. 396 */ 397 398 METHODDEF(void) 399 fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, 400 JSAMPARRAY input_data, JSAMPARRAY output_data) 401 { 402 int outrow; 403 JDIMENSION colctr; 404 JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; 405 register JSAMPROW inptr, above_ptr, below_ptr, outptr; 406 JLONG membersum, neighsum, memberscale, neighscale; 407 int colsum, lastcolsum, nextcolsum; 408 409 /* Expand input data enough to let all the output samples be generated 410 * by the standard loop. Special-casing padded output would be more 411 * efficient. 412 */ 413 expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2, 414 cinfo->image_width, output_cols); 415 416 /* Each of the eight neighbor pixels contributes a fraction SF to the 417 * smoothed pixel, while the main pixel contributes (1-8*SF). In order 418 * to use integer arithmetic, these factors are multiplied by 2^16 = 65536. 419 * Also recall that SF = smoothing_factor / 1024. 420 */ 421 422 memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */ 423 neighscale = cinfo->smoothing_factor * 64; /* scaled SF */ 424 425 for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { 426 outptr = output_data[outrow]; 427 inptr = input_data[outrow]; 428 above_ptr = input_data[outrow-1]; 429 below_ptr = input_data[outrow+1]; 430 431 /* Special case for first column */ 432 colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + 433 GETJSAMPLE(*inptr); 434 membersum = GETJSAMPLE(*inptr++); 435 nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + 436 GETJSAMPLE(*inptr); 437 neighsum = colsum + (colsum - membersum) + nextcolsum; 438 membersum = membersum * memberscale + neighsum * neighscale; 439 *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); 440 lastcolsum = colsum; colsum = nextcolsum; 441 442 for (colctr = output_cols - 2; colctr > 0; colctr--) { 443 membersum = GETJSAMPLE(*inptr++); 444 above_ptr++; below_ptr++; 445 nextcolsum = GETJSAMPLE(*above_ptr) + GETJSAMPLE(*below_ptr) + 446 GETJSAMPLE(*inptr); 447 neighsum = lastcolsum + (colsum - membersum) + nextcolsum; 448 membersum = membersum * memberscale + neighsum * neighscale; 449 *outptr++ = (JSAMPLE) ((membersum + 32768) >> 16); 450 lastcolsum = colsum; colsum = nextcolsum; 451 } 452 453 /* Special case for last column */ 454 membersum = GETJSAMPLE(*inptr); 455 neighsum = lastcolsum + (colsum - membersum) + colsum; 456 membersum = membersum * memberscale + neighsum * neighscale; 457 *outptr = (JSAMPLE) ((membersum + 32768) >> 16); 458 459 } 460 } 461 462 #endif /* INPUT_SMOOTHING_SUPPORTED */ 463 464 465 /* 466 * Module initialization routine for downsampling. 467 * Note that we must select a routine for each component. 468 */ 469 470 GLOBAL(void) 471 jinit_downsampler (j_compress_ptr cinfo) 472 { 473 my_downsample_ptr downsample; 474 int ci; 475 jpeg_component_info *compptr; 476 boolean smoothok = TRUE; 477 478 downsample = (my_downsample_ptr) 479 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 480 sizeof(my_downsampler)); 481 cinfo->downsample = (struct jpeg_downsampler *) downsample; 482 downsample->pub.start_pass = start_pass_downsample; 483 downsample->pub.downsample = sep_downsample; 484 downsample->pub.need_context_rows = FALSE; 485 486 if (cinfo->CCIR601_sampling) 487 ERREXIT(cinfo, JERR_CCIR601_NOTIMPL); 488 489 /* Verify we can handle the sampling factors, and set up method pointers */ 490 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 491 ci++, compptr++) { 492 if (compptr->h_samp_factor == cinfo->max_h_samp_factor && 493 compptr->v_samp_factor == cinfo->max_v_samp_factor) { 494 #ifdef INPUT_SMOOTHING_SUPPORTED 495 if (cinfo->smoothing_factor) { 496 downsample->methods[ci] = fullsize_smooth_downsample; 497 downsample->pub.need_context_rows = TRUE; 498 } else 499 #endif 500 downsample->methods[ci] = fullsize_downsample; 501 } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && 502 compptr->v_samp_factor == cinfo->max_v_samp_factor) { 503 smoothok = FALSE; 504 if (jsimd_can_h2v1_downsample()) 505 downsample->methods[ci] = jsimd_h2v1_downsample; 506 else 507 downsample->methods[ci] = h2v1_downsample; 508 } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && 509 compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { 510 #ifdef INPUT_SMOOTHING_SUPPORTED 511 if (cinfo->smoothing_factor) { 512 #if defined(__mips__) 513 if (jsimd_can_h2v2_smooth_downsample()) 514 downsample->methods[ci] = jsimd_h2v2_smooth_downsample; 515 else 516 #endif 517 downsample->methods[ci] = h2v2_smooth_downsample; 518 downsample->pub.need_context_rows = TRUE; 519 } else 520 #endif 521 { 522 if (jsimd_can_h2v2_downsample()) 523 downsample->methods[ci] = jsimd_h2v2_downsample; 524 else 525 downsample->methods[ci] = h2v2_downsample; 526 } 527 } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 && 528 (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { 529 smoothok = FALSE; 530 downsample->methods[ci] = int_downsample; 531 } else 532 ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); 533 } 534 535 #ifdef INPUT_SMOOTHING_SUPPORTED 536 if (cinfo->smoothing_factor && !smoothok) 537 TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL); 538 #endif 539 } 540