1 /* 2 * The copyright in this software is being made available under the 2-clauses 3 * BSD License, included below. This software may be subject to other third 4 * party and contributor rights, including patent rights, and no such rights 5 * are granted under this license. 6 * 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 8 * Copyright (c) 2002-2014, Professor Benoit Macq 9 * Copyright (c) 2001-2003, David Janssens 10 * Copyright (c) 2002-2003, Yannick Verschueren 11 * Copyright (c) 2003-2007, Francois-Olivier Devaux 12 * Copyright (c) 2003-2014, Antonin Descampe 13 * Copyright (c) 2005, Herve Drolon, FreeImage Team 14 * Copyright (c) 2007, Jonathan Ballard <dzonatas (at) dzonux.net> 15 * Copyright (c) 2007, Callum Lerwick <seg (at) haxxed.com> 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #ifdef __SSE__ 41 #include <xmmintrin.h> 42 #endif 43 44 #include "opj_includes.h" 45 46 /** @defgroup DWT DWT - Implementation of a discrete wavelet transform */ 47 /*@{*/ 48 49 /** @name Local data structures */ 50 /*@{*/ 51 52 typedef struct dwt_local { 53 OPJ_INT32* mem; 54 OPJ_SIZE_T mem_count; 55 OPJ_INT32 dn; 56 OPJ_INT32 sn; 57 OPJ_INT32 cas; 58 } opj_dwt_t; 59 60 typedef union { 61 OPJ_FLOAT32 f[4]; 62 } opj_v4_t; 63 64 typedef struct v4dwt_local { 65 opj_v4_t* wavelet ; 66 OPJ_INT32 dn ; 67 OPJ_INT32 sn ; 68 OPJ_INT32 cas ; 69 } opj_v4dwt_t ; 70 71 static const OPJ_FLOAT32 opj_dwt_alpha = 1.586134342f; /* 12994 */ 72 static const OPJ_FLOAT32 opj_dwt_beta = 0.052980118f; /* 434 */ 73 static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /* -7233 */ 74 static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /* -3633 */ 75 76 static const OPJ_FLOAT32 opj_K = 1.230174105f; /* 10078 */ 77 static const OPJ_FLOAT32 opj_c13318 = 1.625732422f; 78 79 /*@}*/ 80 81 /** 82 Virtual function type for wavelet transform in 1-D 83 */ 84 typedef void (*DWT1DFN)(opj_dwt_t* v); 85 86 /** @name Local static functions */ 87 /*@{*/ 88 89 /** 90 Forward lazy transform (horizontal) 91 */ 92 static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 93 /** 94 Forward lazy transform (vertical) 95 */ 96 static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas); 97 /** 98 Inverse lazy transform (horizontal) 99 */ 100 static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a); 101 /** 102 Inverse lazy transform (vertical) 103 */ 104 static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x); 105 /** 106 Forward 5-3 wavelet transform in 1-D 107 */ 108 static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 109 /** 110 Inverse 5-3 wavelet transform in 1-D 111 */ 112 static void opj_dwt_decode_1(opj_dwt_t *v); 113 static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 114 /** 115 Forward 9-7 wavelet transform in 1-D 116 */ 117 static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas); 118 /** 119 Explicit calculation of the Quantization Stepsizes 120 */ 121 static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize); 122 /** 123 Inverse wavelet transform in 2-D. 124 */ 125 static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn); 126 127 static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, 128 void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)); 129 130 static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i); 131 132 /* <summary> */ 133 /* Inverse 9-7 wavelet transform in 1-D. */ 134 /* </summary> */ 135 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt); 136 137 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size); 138 139 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read); 140 141 #ifdef __SSE__ 142 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c); 143 144 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c); 145 146 #else 147 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c); 148 149 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c); 150 151 #endif 152 153 /*@}*/ 154 155 /*@}*/ 156 157 #define IDX_S(i) (i)*2 158 #define IDX_D(i) 1 + (i)* 2 159 #define UNDERFLOW_SN(i) ((i) >= sn&&sn>0) 160 #define UNDERFLOW_DN(i) ((i) >= dn&&dn>0) 161 #define OVERFLOW_S(i) (IDX_S(i) >= a_count) 162 #define OVERFLOW_D(i) (IDX_D(i) >= a_count) 163 164 #define OPJ_S(i) a[IDX_S(i)] 165 #define OPJ_D(i) a[IDX_D(i)] 166 #define OPJ_S_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_SN(i) ? OPJ_S(sn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i))) 167 #define OPJ_D_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_DN(i) ? OPJ_D(dn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i))) 168 /* new */ 169 #define OPJ_SS_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_DN(i) ? OPJ_S(dn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i))) 170 #define OPJ_DD_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_SN(i) ? OPJ_D(sn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i))) 171 172 /* <summary> */ 173 /* This table contains the norms of the 5-3 wavelets for different bands. */ 174 /* </summary> */ 175 static const OPJ_FLOAT64 opj_dwt_norms[4][10] = { 176 {1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3}, 177 {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, 178 {1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9}, 179 {.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93} 180 }; 181 182 /* <summary> */ 183 /* This table contains the norms of the 9-7 wavelets for different bands. */ 184 /* </summary> */ 185 static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = { 186 {1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9}, 187 {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, 188 {2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0}, 189 {2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2} 190 }; 191 192 /* 193 ========================================================== 194 local functions 195 ========================================================== 196 */ 197 198 /* <summary> */ 199 /* Forward lazy transform (horizontal). */ 200 /* </summary> */ 201 static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 202 OPJ_INT32 i; 203 OPJ_INT32 * l_dest = b; 204 OPJ_INT32 * l_src = a+cas; 205 206 for (i=0; i<sn; ++i) { 207 *l_dest++ = *l_src; 208 l_src += 2; 209 } 210 211 l_dest = b + sn; 212 l_src = a + 1 - cas; 213 214 for (i=0; i<dn; ++i) { 215 *l_dest++=*l_src; 216 l_src += 2; 217 } 218 } 219 220 /* <summary> */ 221 /* Forward lazy transform (vertical). */ 222 /* </summary> */ 223 static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) { 224 OPJ_INT32 i = sn; 225 OPJ_INT32 * l_dest = b; 226 OPJ_INT32 * l_src = a+cas; 227 228 while (i--) { 229 *l_dest = *l_src; 230 l_dest += x; 231 l_src += 2; 232 } /* b[i*x]=a[2*i+cas]; */ 233 234 l_dest = b + sn * x; 235 l_src = a + 1 - cas; 236 237 i = dn; 238 while (i--) { 239 *l_dest = *l_src; 240 l_dest += x; 241 l_src += 2; 242 } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/ 243 } 244 245 /* <summary> */ 246 /* Inverse lazy transform (horizontal). */ 247 /* </summary> */ 248 static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a) { 249 OPJ_INT32 *ai = a; 250 OPJ_INT32 *bi = h->mem + h->cas; 251 OPJ_INT32 i = h->sn; 252 while( i-- ) { 253 *bi = *(ai++); 254 bi += 2; 255 } 256 ai = a + h->sn; 257 bi = h->mem + 1 - h->cas; 258 i = h->dn ; 259 while( i-- ) { 260 *bi = *(ai++); 261 bi += 2; 262 } 263 } 264 265 /* <summary> */ 266 /* Inverse lazy transform (vertical). */ 267 /* </summary> */ 268 static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) { 269 OPJ_INT32 *ai = a; 270 OPJ_INT32 *bi = v->mem + v->cas; 271 OPJ_INT32 i = v->sn; 272 while( i-- ) { 273 *bi = *ai; 274 bi += 2; 275 ai += x; 276 } 277 ai = a + (v->sn * x); 278 bi = v->mem + 1 - v->cas; 279 i = v->dn ; 280 while( i-- ) { 281 *bi = *ai; 282 bi += 2; 283 ai += x; 284 } 285 } 286 287 288 /* <summary> */ 289 /* Forward 5-3 wavelet transform in 1-D. */ 290 /* </summary> */ 291 static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 292 OPJ_INT32 i; 293 294 if (!cas) { 295 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 296 for (i = 0; i < dn; i++) OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; 297 for (i = 0; i < sn; i++) OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; 298 } 299 } else { 300 if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ 301 OPJ_S(0) *= 2; 302 else { 303 for (i = 0; i < dn; i++) OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; 304 for (i = 0; i < sn; i++) OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; 305 } 306 } 307 } 308 309 /* <summary> */ 310 /* Inverse 5-3 wavelet transform in 1-D. */ 311 /* </summary> */ 312 static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 313 OPJ_INT32 i; 314 315 if (!cas) { 316 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 317 for (i = 0; i < sn; i++) OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2; 318 for (i = 0; i < dn; i++) OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1; 319 } 320 } else { 321 if (!sn && dn == 1) /* NEW : CASE ONE ELEMENT */ 322 OPJ_S(0) /= 2; 323 else { 324 for (i = 0; i < sn; i++) OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2; 325 for (i = 0; i < dn; i++) OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1; 326 } 327 } 328 } 329 330 /* <summary> */ 331 /* Inverse 5-3 wavelet transform in 1-D. */ 332 /* </summary> */ 333 static void opj_dwt_decode_1(opj_dwt_t *v) { 334 opj_dwt_decode_1_(v->mem, v->mem_count, v->dn, v->sn, v->cas); 335 } 336 337 /* <summary> */ 338 /* Forward 9-7 wavelet transform in 1-D. */ 339 /* </summary> */ 340 static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) { 341 OPJ_INT32 i; 342 if (!cas) { 343 if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */ 344 for (i = 0; i < dn; i++) 345 OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993); 346 for (i = 0; i < sn; i++) 347 OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434); 348 for (i = 0; i < dn; i++) 349 OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233); 350 for (i = 0; i < sn; i++) 351 OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633); 352 for (i = 0; i < dn; i++) 353 OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */ 354 for (i = 0; i < sn; i++) 355 OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */ 356 } 357 } else { 358 if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */ 359 for (i = 0; i < dn; i++) 360 OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993); 361 for (i = 0; i < sn; i++) 362 OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434); 363 for (i = 0; i < dn; i++) 364 OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233); 365 for (i = 0; i < sn; i++) 366 OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633); 367 for (i = 0; i < dn; i++) 368 OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */ 369 for (i = 0; i < sn; i++) 370 OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */ 371 } 372 } 373 } 374 375 static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize) { 376 OPJ_INT32 p, n; 377 p = opj_int_floorlog2(stepsize) - 13; 378 n = 11 - opj_int_floorlog2(stepsize); 379 bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff; 380 bandno_stepsize->expn = numbps - p; 381 } 382 383 /* 384 ========================================================== 385 DWT interface 386 ========================================================== 387 */ 388 389 390 /* <summary> */ 391 /* Forward 5-3 wavelet transform in 2-D. */ 392 /* </summary> */ 393 static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32)) 394 { 395 OPJ_INT32 i, j, k; 396 OPJ_INT32 *a = 00; 397 OPJ_INT32 *aj = 00; 398 OPJ_INT32 *bj = 00; 399 OPJ_INT32 w, l; 400 401 OPJ_INT32 rw; /* width of the resolution level computed */ 402 OPJ_INT32 rh; /* height of the resolution level computed */ 403 OPJ_SIZE_T l_data_count; 404 OPJ_SIZE_T l_data_size; 405 406 opj_tcd_resolution_t * l_cur_res = 0; 407 opj_tcd_resolution_t * l_last_res = 0; 408 409 w = tilec->x1-tilec->x0; 410 l = (OPJ_INT32)tilec->numresolutions-1; 411 a = tilec->data; 412 413 l_cur_res = tilec->resolutions + l; 414 l_last_res = l_cur_res - 1; 415 416 l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32); 417 l_data_size = l_data_count * (OPJ_UINT32)sizeof(OPJ_INT32); 418 bj = (OPJ_INT32*)opj_malloc(l_data_size); 419 if (! bj) { 420 return OPJ_FALSE; 421 } 422 i = l; 423 424 while (i--) { 425 OPJ_INT32 rw1; /* width of the resolution level once lower than computed one */ 426 OPJ_INT32 rh1; /* height of the resolution level once lower than computed one */ 427 OPJ_INT32 cas_col; /* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */ 428 OPJ_INT32 cas_row; /* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering */ 429 OPJ_INT32 dn, sn; 430 431 rw = l_cur_res->x1 - l_cur_res->x0; 432 rh = l_cur_res->y1 - l_cur_res->y0; 433 rw1 = l_last_res->x1 - l_last_res->x0; 434 rh1 = l_last_res->y1 - l_last_res->y0; 435 436 cas_row = l_cur_res->x0 & 1; 437 cas_col = l_cur_res->y0 & 1; 438 439 sn = rh1; 440 dn = rh - rh1; 441 for (j = 0; j < rw; ++j) { 442 aj = a + j; 443 for (k = 0; k < rh; ++k) { 444 bj[k] = aj[k*w]; 445 } 446 447 (*p_function) (bj, l_data_count, dn, sn, cas_col); 448 449 opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col); 450 } 451 452 sn = rw1; 453 dn = rw - rw1; 454 455 for (j = 0; j < rh; j++) { 456 aj = a + j * w; 457 for (k = 0; k < rw; k++) bj[k] = aj[k]; 458 (*p_function) (bj, l_data_count, dn, sn, cas_row); 459 opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row); 460 } 461 462 l_cur_res = l_last_res; 463 464 --l_last_res; 465 } 466 467 opj_free(bj); 468 return OPJ_TRUE; 469 } 470 471 /* Forward 5-3 wavelet transform in 2-D. */ 472 /* </summary> */ 473 OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec) 474 { 475 return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1); 476 } 477 478 /* <summary> */ 479 /* Inverse 5-3 wavelet transform in 2-D. */ 480 /* </summary> */ 481 OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) { 482 return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1); 483 } 484 485 486 /* <summary> */ 487 /* Get gain of 5-3 wavelet transform. */ 488 /* </summary> */ 489 OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) { 490 if (orient == 0) 491 return 0; 492 if (orient == 1 || orient == 2) 493 return 1; 494 return 2; 495 } 496 497 /* <summary> */ 498 /* Get norm of 5-3 wavelet. */ 499 /* </summary> */ 500 OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) { 501 return opj_dwt_norms[orient][level]; 502 } 503 504 /* <summary> */ 505 /* Forward 9-7 wavelet transform in 2-D. */ 506 /* </summary> */ 507 OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec) 508 { 509 return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1_real); 510 } 511 512 /* <summary> */ 513 /* Get gain of 9-7 wavelet transform. */ 514 /* </summary> */ 515 OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) { 516 (void)orient; 517 return 0; 518 } 519 520 /* <summary> */ 521 /* Get norm of 9-7 wavelet. */ 522 /* </summary> */ 523 OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) { 524 return opj_dwt_norms_real[orient][level]; 525 } 526 527 void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) { 528 OPJ_UINT32 numbands, bandno; 529 numbands = 3 * tccp->numresolutions - 2; 530 for (bandno = 0; bandno < numbands; bandno++) { 531 OPJ_FLOAT64 stepsize; 532 OPJ_UINT32 resno, level, orient, gain; 533 534 resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1); 535 orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1); 536 level = tccp->numresolutions - 1 - resno; 537 gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) || (orient == 2)) ? 1 : 2)); 538 if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) { 539 stepsize = 1.0; 540 } else { 541 OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level]; 542 stepsize = (1 << (gain)) / norm; 543 } 544 opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]); 545 } 546 } 547 548 /* <summary> */ 549 /* Determine maximum computed resolution level for inverse wavelet transform */ 550 /* </summary> */ 551 static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) { 552 OPJ_UINT32 mr = 0; 553 OPJ_UINT32 w; 554 while( --i ) { 555 ++r; 556 if( mr < ( w = (OPJ_UINT32)(r->x1 - r->x0) ) ) 557 mr = w ; 558 if( mr < ( w = (OPJ_UINT32)(r->y1 - r->y0) ) ) 559 mr = w ; 560 } 561 return mr ; 562 } 563 564 /* <summary> */ 565 /* Inverse wavelet transform in 2-D. */ 566 /* </summary> */ 567 static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) { 568 opj_dwt_t h; 569 opj_dwt_t v; 570 571 opj_tcd_resolution_t* tr = tilec->resolutions; 572 573 OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0); /* width of the resolution level computed */ 574 OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0); /* height of the resolution level computed */ 575 576 OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); 577 578 h.mem_count = opj_dwt_max_resolution(tr, numres); 579 h.mem = (OPJ_INT32*)opj_aligned_malloc(h.mem_count * sizeof(OPJ_INT32)); 580 if (! h.mem){ 581 /* FIXME event manager error callback */ 582 return OPJ_FALSE; 583 } 584 585 v.mem_count = h.mem_count; 586 v.mem = h.mem; 587 588 while( --numres) { 589 OPJ_INT32 * restrict tiledp = tilec->data; 590 OPJ_UINT32 j; 591 592 ++tr; 593 h.sn = (OPJ_INT32)rw; 594 v.sn = (OPJ_INT32)rh; 595 596 rw = (OPJ_UINT32)(tr->x1 - tr->x0); 597 rh = (OPJ_UINT32)(tr->y1 - tr->y0); 598 599 h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); 600 h.cas = tr->x0 % 2; 601 602 for(j = 0; j < rh; ++j) { 603 opj_dwt_interleave_h(&h, &tiledp[j*w]); 604 (dwt_1D)(&h); 605 memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32)); 606 } 607 608 v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); 609 v.cas = tr->y0 % 2; 610 611 for(j = 0; j < rw; ++j){ 612 OPJ_UINT32 k; 613 opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w); 614 (dwt_1D)(&v); 615 for(k = 0; k < rh; ++k) { 616 tiledp[k * w + j] = v.mem[k]; 617 } 618 } 619 } 620 opj_aligned_free(h.mem); 621 return OPJ_TRUE; 622 } 623 624 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){ 625 OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas); 626 OPJ_INT32 count = w->sn; 627 OPJ_INT32 i, k; 628 629 for(k = 0; k < 2; ++k){ 630 if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) { 631 /* Fast code path */ 632 for(i = 0; i < count; ++i){ 633 OPJ_INT32 j = i; 634 bi[i*8 ] = a[j]; 635 j += x; 636 bi[i*8 + 1] = a[j]; 637 j += x; 638 bi[i*8 + 2] = a[j]; 639 j += x; 640 bi[i*8 + 3] = a[j]; 641 } 642 } 643 else { 644 /* Slow code path */ 645 for(i = 0; i < count; ++i){ 646 OPJ_INT32 j = i; 647 bi[i*8 ] = a[j]; 648 j += x; 649 if(j >= size) continue; 650 bi[i*8 + 1] = a[j]; 651 j += x; 652 if(j >= size) continue; 653 bi[i*8 + 2] = a[j]; 654 j += x; 655 if(j >= size) continue; 656 bi[i*8 + 3] = a[j]; /* This one*/ 657 } 658 } 659 660 bi = (OPJ_FLOAT32*) (w->wavelet + 1 - w->cas); 661 a += w->sn; 662 size -= w->sn; 663 count = w->dn; 664 } 665 } 666 667 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){ 668 opj_v4_t* restrict bi = v->wavelet + v->cas; 669 OPJ_INT32 i; 670 671 for(i = 0; i < v->sn; ++i){ 672 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); 673 } 674 675 a += v->sn * x; 676 bi = v->wavelet + 1 - v->cas; 677 678 for(i = 0; i < v->dn; ++i){ 679 memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32)); 680 } 681 } 682 683 #ifdef __SSE__ 684 685 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){ 686 __m128* restrict vw = (__m128*) w; 687 OPJ_INT32 i; 688 /* 4x unrolled loop */ 689 for(i = 0; i < count >> 2; ++i){ 690 *vw = _mm_mul_ps(*vw, c); 691 vw += 2; 692 *vw = _mm_mul_ps(*vw, c); 693 vw += 2; 694 *vw = _mm_mul_ps(*vw, c); 695 vw += 2; 696 *vw = _mm_mul_ps(*vw, c); 697 vw += 2; 698 } 699 count &= 3; 700 for(i = 0; i < count; ++i){ 701 *vw = _mm_mul_ps(*vw, c); 702 vw += 2; 703 } 704 } 705 706 void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){ 707 __m128* restrict vl = (__m128*) l; 708 __m128* restrict vw = (__m128*) w; 709 OPJ_INT32 i; 710 __m128 tmp1, tmp2, tmp3; 711 tmp1 = vl[0]; 712 for(i = 0; i < m; ++i){ 713 tmp2 = vw[-1]; 714 tmp3 = vw[ 0]; 715 vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c)); 716 tmp1 = tmp3; 717 vw += 2; 718 } 719 vl = vw - 2; 720 if(m >= k){ 721 return; 722 } 723 c = _mm_add_ps(c, c); 724 c = _mm_mul_ps(c, vl[0]); 725 for(; m < k; ++m){ 726 __m128 tmp = vw[-1]; 727 vw[-1] = _mm_add_ps(tmp, c); 728 vw += 2; 729 } 730 } 731 732 #else 733 734 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c) 735 { 736 OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; 737 OPJ_INT32 i; 738 for(i = 0; i < count; ++i){ 739 OPJ_FLOAT32 tmp1 = fw[i*8 ]; 740 OPJ_FLOAT32 tmp2 = fw[i*8 + 1]; 741 OPJ_FLOAT32 tmp3 = fw[i*8 + 2]; 742 OPJ_FLOAT32 tmp4 = fw[i*8 + 3]; 743 fw[i*8 ] = tmp1 * c; 744 fw[i*8 + 1] = tmp2 * c; 745 fw[i*8 + 2] = tmp3 * c; 746 fw[i*8 + 3] = tmp4 * c; 747 } 748 } 749 750 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c) 751 { 752 OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l; 753 OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w; 754 OPJ_INT32 i; 755 for(i = 0; i < m; ++i){ 756 OPJ_FLOAT32 tmp1_1 = fl[0]; 757 OPJ_FLOAT32 tmp1_2 = fl[1]; 758 OPJ_FLOAT32 tmp1_3 = fl[2]; 759 OPJ_FLOAT32 tmp1_4 = fl[3]; 760 OPJ_FLOAT32 tmp2_1 = fw[-4]; 761 OPJ_FLOAT32 tmp2_2 = fw[-3]; 762 OPJ_FLOAT32 tmp2_3 = fw[-2]; 763 OPJ_FLOAT32 tmp2_4 = fw[-1]; 764 OPJ_FLOAT32 tmp3_1 = fw[0]; 765 OPJ_FLOAT32 tmp3_2 = fw[1]; 766 OPJ_FLOAT32 tmp3_3 = fw[2]; 767 OPJ_FLOAT32 tmp3_4 = fw[3]; 768 fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c); 769 fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c); 770 fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c); 771 fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c); 772 fl = fw; 773 fw += 8; 774 } 775 if(m < k){ 776 OPJ_FLOAT32 c1; 777 OPJ_FLOAT32 c2; 778 OPJ_FLOAT32 c3; 779 OPJ_FLOAT32 c4; 780 c += c; 781 c1 = fl[0] * c; 782 c2 = fl[1] * c; 783 c3 = fl[2] * c; 784 c4 = fl[3] * c; 785 for(; m < k; ++m){ 786 OPJ_FLOAT32 tmp1 = fw[-4]; 787 OPJ_FLOAT32 tmp2 = fw[-3]; 788 OPJ_FLOAT32 tmp3 = fw[-2]; 789 OPJ_FLOAT32 tmp4 = fw[-1]; 790 fw[-4] = tmp1 + c1; 791 fw[-3] = tmp2 + c2; 792 fw[-2] = tmp3 + c3; 793 fw[-1] = tmp4 + c4; 794 fw += 8; 795 } 796 } 797 } 798 799 #endif 800 801 /* <summary> */ 802 /* Inverse 9-7 wavelet transform in 1-D. */ 803 /* </summary> */ 804 void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt) 805 { 806 OPJ_INT32 a, b; 807 if(dwt->cas == 0) { 808 if(!((dwt->dn > 0) || (dwt->sn > 1))){ 809 return; 810 } 811 a = 0; 812 b = 1; 813 }else{ 814 if(!((dwt->sn > 0) || (dwt->dn > 1))) { 815 return; 816 } 817 a = 1; 818 b = 0; 819 } 820 #ifdef __SSE__ 821 opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K)); 822 opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318)); 823 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta)); 824 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma)); 825 opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta)); 826 opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha)); 827 #else 828 opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K); 829 opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318); 830 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_delta); 831 opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma); 832 opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_beta); 833 opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha); 834 #endif 835 } 836 837 838 /* <summary> */ 839 /* Inverse 9-7 wavelet transform in 2-D. */ 840 /* </summary> */ 841 OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres) 842 { 843 opj_v4dwt_t h; 844 opj_v4dwt_t v; 845 846 opj_tcd_resolution_t* res = tilec->resolutions; 847 848 OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ 849 OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ 850 851 OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0); 852 853 h.wavelet = (opj_v4_t*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(opj_v4_t)); 854 if (!h.wavelet) { 855 /* FIXME event manager error callback */ 856 return OPJ_FALSE; 857 } 858 v.wavelet = h.wavelet; 859 860 while( --numres) { 861 OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data; 862 OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0)); 863 OPJ_INT32 j; 864 865 h.sn = (OPJ_INT32)rw; 866 v.sn = (OPJ_INT32)rh; 867 868 ++res; 869 870 rw = (OPJ_UINT32)(res->x1 - res->x0); /* width of the resolution level computed */ 871 rh = (OPJ_UINT32)(res->y1 - res->y0); /* height of the resolution level computed */ 872 873 h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn); 874 h.cas = res->x0 % 2; 875 876 for(j = (OPJ_INT32)rh; j > 3; j -= 4) { 877 OPJ_INT32 k; 878 opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); 879 opj_v4dwt_decode(&h); 880 881 for(k = (OPJ_INT32)rw; --k >= 0;){ 882 aj[k ] = h.wavelet[k].f[0]; 883 aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; 884 aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; 885 aj[k+(OPJ_INT32)w*3] = h.wavelet[k].f[3]; 886 } 887 888 aj += w*4; 889 bufsize -= w*4; 890 } 891 892 if (rh & 0x03) { 893 OPJ_INT32 k; 894 j = rh & 0x03; 895 opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize); 896 opj_v4dwt_decode(&h); 897 for(k = (OPJ_INT32)rw; --k >= 0;){ 898 switch(j) { 899 case 3: aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2]; 900 case 2: aj[k+(OPJ_INT32)w ] = h.wavelet[k].f[1]; 901 case 1: aj[k ] = h.wavelet[k].f[0]; 902 } 903 } 904 } 905 906 v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn); 907 v.cas = res->y0 % 2; 908 909 aj = (OPJ_FLOAT32*) tilec->data; 910 for(j = (OPJ_INT32)rw; j > 3; j -= 4){ 911 OPJ_UINT32 k; 912 913 opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, 4); 914 opj_v4dwt_decode(&v); 915 916 for(k = 0; k < rh; ++k){ 917 memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32)); 918 } 919 aj += 4; 920 } 921 922 if (rw & 0x03){ 923 OPJ_UINT32 k; 924 925 j = rw & 0x03; 926 927 opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, j); 928 opj_v4dwt_decode(&v); 929 930 for(k = 0; k < rh; ++k){ 931 memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32)); 932 } 933 } 934 } 935 936 opj_aligned_free(h.wavelet); 937 return OPJ_TRUE; 938 } 939