Home | History | Annotate | Download | only in libopenjpeg20
      1 /*
      2  * The copyright in this software is being made available under the 2-clauses
      3  * BSD License, included below. This software may be subject to other third
      4  * party and contributor rights, including patent rights, and no such rights
      5  * are granted under this license.
      6  *
      7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
      8  * Copyright (c) 2002-2014, Professor Benoit Macq
      9  * Copyright (c) 2001-2003, David Janssens
     10  * Copyright (c) 2002-2003, Yannick Verschueren
     11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
     12  * Copyright (c) 2003-2014, Antonin Descampe
     13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
     14  * Copyright (c) 2007, Jonathan Ballard <dzonatas (at) dzonux.net>
     15  * Copyright (c) 2007, Callum Lerwick <seg (at) haxxed.com>
     16  * All rights reserved.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
     28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 #ifdef __SSE__
     41 #include <xmmintrin.h>
     42 #endif
     43 
     44 #include "opj_includes.h"
     45 
     46 /** @defgroup DWT DWT - Implementation of a discrete wavelet transform */
     47 /*@{*/
     48 
     49 /** @name Local data structures */
     50 /*@{*/
     51 
     52 typedef struct dwt_local {
     53 	OPJ_INT32* mem;
     54 	OPJ_SIZE_T mem_count;
     55 	OPJ_INT32 dn;
     56 	OPJ_INT32 sn;
     57 	OPJ_INT32 cas;
     58 } opj_dwt_t;
     59 
     60 typedef union {
     61 	OPJ_FLOAT32	f[4];
     62 } opj_v4_t;
     63 
     64 typedef struct v4dwt_local {
     65 	opj_v4_t*	wavelet ;
     66 	OPJ_INT32		dn ;
     67 	OPJ_INT32		sn ;
     68 	OPJ_INT32		cas ;
     69 } opj_v4dwt_t ;
     70 
     71 static const OPJ_FLOAT32 opj_dwt_alpha =  1.586134342f; /*  12994 */
     72 static const OPJ_FLOAT32 opj_dwt_beta  =  0.052980118f; /*    434 */
     73 static const OPJ_FLOAT32 opj_dwt_gamma = -0.882911075f; /*  -7233 */
     74 static const OPJ_FLOAT32 opj_dwt_delta = -0.443506852f; /*  -3633 */
     75 
     76 static const OPJ_FLOAT32 opj_K      = 1.230174105f; /*  10078 */
     77 static const OPJ_FLOAT32 opj_c13318 = 1.625732422f;
     78 
     79 /*@}*/
     80 
     81 /**
     82 Virtual function type for wavelet transform in 1-D
     83 */
     84 typedef void (*DWT1DFN)(opj_dwt_t* v);
     85 
     86 /** @name Local static functions */
     87 /*@{*/
     88 
     89 /**
     90 Forward lazy transform (horizontal)
     91 */
     92 static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
     93 /**
     94 Forward lazy transform (vertical)
     95 */
     96 static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas);
     97 /**
     98 Inverse lazy transform (horizontal)
     99 */
    100 static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a);
    101 /**
    102 Inverse lazy transform (vertical)
    103 */
    104 static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x);
    105 /**
    106 Forward 5-3 wavelet transform in 1-D
    107 */
    108 static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
    109 /**
    110 Inverse 5-3 wavelet transform in 1-D
    111 */
    112 static void opj_dwt_decode_1(opj_dwt_t *v);
    113 static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
    114 /**
    115 Forward 9-7 wavelet transform in 1-D
    116 */
    117 static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas);
    118 /**
    119 Explicit calculation of the Quantization Stepsizes
    120 */
    121 static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize);
    122 /**
    123 Inverse wavelet transform in 2-D.
    124 */
    125 static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 i, DWT1DFN fn);
    126 
    127 static OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec,
    128 										    void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32));
    129 
    130 static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i);
    131 
    132 /* <summary>                             */
    133 /* Inverse 9-7 wavelet transform in 1-D. */
    134 /* </summary>                            */
    135 static void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt);
    136 
    137 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size);
    138 
    139 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read);
    140 
    141 #ifdef __SSE__
    142 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c);
    143 
    144 static void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c);
    145 
    146 #else
    147 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c);
    148 
    149 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c);
    150 
    151 #endif
    152 
    153 /*@}*/
    154 
    155 /*@}*/
    156 
    157 #define IDX_S(i) (i)*2
    158 #define IDX_D(i) 1 + (i)* 2
    159 #define UNDERFLOW_SN(i) ((i) >= sn&&sn>0)
    160 #define UNDERFLOW_DN(i) ((i) >= dn&&dn>0)
    161 #define OVERFLOW_S(i) (IDX_S(i) >= a_count)
    162 #define OVERFLOW_D(i) (IDX_D(i) >= a_count)
    163 
    164 #define OPJ_S(i) a[IDX_S(i)]
    165 #define OPJ_D(i) a[IDX_D(i)]
    166 #define OPJ_S_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_SN(i) ? OPJ_S(sn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
    167 #define OPJ_D_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_DN(i) ? OPJ_D(dn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
    168 /* new */
    169 #define OPJ_SS_(i) ((i)<0 ? OPJ_S(0) : (UNDERFLOW_DN(i) ? OPJ_S(dn - 1) : OVERFLOW_S(i) ? OPJ_S(i - 1) : OPJ_S(i)))
    170 #define OPJ_DD_(i) ((i)<0 ? OPJ_D(0) : (UNDERFLOW_SN(i) ? OPJ_D(sn - 1) : OVERFLOW_D(i) ? OPJ_D(i - 1) : OPJ_D(i)))
    171 
    172 /* <summary>                                                              */
    173 /* This table contains the norms of the 5-3 wavelets for different bands. */
    174 /* </summary>                                                             */
    175 static const OPJ_FLOAT64 opj_dwt_norms[4][10] = {
    176 	{1.000, 1.500, 2.750, 5.375, 10.68, 21.34, 42.67, 85.33, 170.7, 341.3},
    177 	{1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
    178 	{1.038, 1.592, 2.919, 5.703, 11.33, 22.64, 45.25, 90.48, 180.9},
    179 	{.7186, .9218, 1.586, 3.043, 6.019, 12.01, 24.00, 47.97, 95.93}
    180 };
    181 
    182 /* <summary>                                                              */
    183 /* This table contains the norms of the 9-7 wavelets for different bands. */
    184 /* </summary>                                                             */
    185 static const OPJ_FLOAT64 opj_dwt_norms_real[4][10] = {
    186 	{1.000, 1.965, 4.177, 8.403, 16.90, 33.84, 67.69, 135.3, 270.6, 540.9},
    187 	{2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
    188 	{2.022, 3.989, 8.355, 17.04, 34.27, 68.63, 137.3, 274.6, 549.0},
    189 	{2.080, 3.865, 8.307, 17.18, 34.71, 69.59, 139.3, 278.6, 557.2}
    190 };
    191 
    192 /*
    193 ==========================================================
    194    local functions
    195 ==========================================================
    196 */
    197 
    198 /* <summary>			                 */
    199 /* Forward lazy transform (horizontal).  */
    200 /* </summary>                            */
    201 static void opj_dwt_deinterleave_h(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
    202 	OPJ_INT32 i;
    203 	OPJ_INT32 * l_dest = b;
    204 	OPJ_INT32 * l_src = a+cas;
    205 
    206     for (i=0; i<sn; ++i) {
    207 		*l_dest++ = *l_src;
    208 		l_src += 2;
    209 	}
    210 
    211     l_dest = b + sn;
    212 	l_src = a + 1 - cas;
    213 
    214     for	(i=0; i<dn; ++i)  {
    215 		*l_dest++=*l_src;
    216 		l_src += 2;
    217 	}
    218 }
    219 
    220 /* <summary>                             */
    221 /* Forward lazy transform (vertical).    */
    222 /* </summary>                            */
    223 static void opj_dwt_deinterleave_v(OPJ_INT32 *a, OPJ_INT32 *b, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 x, OPJ_INT32 cas) {
    224     OPJ_INT32 i = sn;
    225 	OPJ_INT32 * l_dest = b;
    226 	OPJ_INT32 * l_src = a+cas;
    227 
    228     while (i--) {
    229 		*l_dest = *l_src;
    230 		l_dest += x;
    231 		l_src += 2;
    232 		} /* b[i*x]=a[2*i+cas]; */
    233 
    234 	l_dest = b + sn * x;
    235 	l_src = a + 1 - cas;
    236 
    237 	i = dn;
    238     while (i--) {
    239 		*l_dest = *l_src;
    240 		l_dest += x;
    241 		l_src += 2;
    242         } /*b[(sn+i)*x]=a[(2*i+1-cas)];*/
    243 }
    244 
    245 /* <summary>                             */
    246 /* Inverse lazy transform (horizontal).  */
    247 /* </summary>                            */
    248 static void opj_dwt_interleave_h(opj_dwt_t* h, OPJ_INT32 *a) {
    249     OPJ_INT32 *ai = a;
    250     OPJ_INT32 *bi = h->mem + h->cas;
    251     OPJ_INT32  i	= h->sn;
    252     while( i-- ) {
    253       *bi = *(ai++);
    254 	  bi += 2;
    255     }
    256     ai	= a + h->sn;
    257     bi	= h->mem + 1 - h->cas;
    258     i	= h->dn ;
    259     while( i-- ) {
    260       *bi = *(ai++);
    261 	  bi += 2;
    262     }
    263 }
    264 
    265 /* <summary>                             */
    266 /* Inverse lazy transform (vertical).    */
    267 /* </summary>                            */
    268 static void opj_dwt_interleave_v(opj_dwt_t* v, OPJ_INT32 *a, OPJ_INT32 x) {
    269     OPJ_INT32 *ai = a;
    270     OPJ_INT32 *bi = v->mem + v->cas;
    271     OPJ_INT32  i = v->sn;
    272     while( i-- ) {
    273       *bi = *ai;
    274 	  bi += 2;
    275 	  ai += x;
    276     }
    277     ai = a + (v->sn * x);
    278     bi = v->mem + 1 - v->cas;
    279     i = v->dn ;
    280     while( i-- ) {
    281       *bi = *ai;
    282 	  bi += 2;
    283 	  ai += x;
    284     }
    285 }
    286 
    287 
    288 /* <summary>                            */
    289 /* Forward 5-3 wavelet transform in 1-D. */
    290 /* </summary>                           */
    291 static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
    292 	OPJ_INT32 i;
    293 
    294 	if (!cas) {
    295 		if ((dn > 0) || (sn > 1)) {	/* NEW :  CASE ONE ELEMENT */
    296 			for (i = 0; i < dn; i++) OPJ_D(i) -= (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
    297 			for (i = 0; i < sn; i++) OPJ_S(i) += (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
    298 		}
    299 	} else {
    300 		if (!sn && dn == 1)		    /* NEW :  CASE ONE ELEMENT */
    301 			OPJ_S(0) *= 2;
    302 		else {
    303 			for (i = 0; i < dn; i++) OPJ_S(i) -= (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
    304 			for (i = 0; i < sn; i++) OPJ_D(i) += (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
    305 		}
    306 	}
    307 }
    308 
    309 /* <summary>                            */
    310 /* Inverse 5-3 wavelet transform in 1-D. */
    311 /* </summary>                           */
    312 static void opj_dwt_decode_1_(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
    313 	OPJ_INT32 i;
    314 
    315 	if (!cas) {
    316 		if ((dn > 0) || (sn > 1)) { /* NEW :  CASE ONE ELEMENT */
    317 			for (i = 0; i < sn; i++) OPJ_S(i) -= (OPJ_D_(i - 1) + OPJ_D_(i) + 2) >> 2;
    318 			for (i = 0; i < dn; i++) OPJ_D(i) += (OPJ_S_(i) + OPJ_S_(i + 1)) >> 1;
    319 		}
    320 	} else {
    321 		if (!sn  && dn == 1)          /* NEW :  CASE ONE ELEMENT */
    322 			OPJ_S(0) /= 2;
    323 		else {
    324 			for (i = 0; i < sn; i++) OPJ_D(i) -= (OPJ_SS_(i) + OPJ_SS_(i + 1) + 2) >> 2;
    325 			for (i = 0; i < dn; i++) OPJ_S(i) += (OPJ_DD_(i) + OPJ_DD_(i - 1)) >> 1;
    326 		}
    327 	}
    328 }
    329 
    330 /* <summary>                            */
    331 /* Inverse 5-3 wavelet transform in 1-D. */
    332 /* </summary>                           */
    333 static void opj_dwt_decode_1(opj_dwt_t *v) {
    334 	opj_dwt_decode_1_(v->mem, v->mem_count, v->dn, v->sn, v->cas);
    335 }
    336 
    337 /* <summary>                             */
    338 /* Forward 9-7 wavelet transform in 1-D. */
    339 /* </summary>                            */
    340 static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_SIZE_T a_count, OPJ_INT32 dn, OPJ_INT32 sn, OPJ_INT32 cas) {
    341 	OPJ_INT32 i;
    342 	if (!cas) {
    343 		if ((dn > 0) || (sn > 1)) {	/* NEW :  CASE ONE ELEMENT */
    344 			for (i = 0; i < dn; i++)
    345 				OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993);
    346 			for (i = 0; i < sn; i++)
    347 				OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434);
    348 			for (i = 0; i < dn; i++)
    349 				OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233);
    350 			for (i = 0; i < sn; i++)
    351 				OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633);
    352 			for (i = 0; i < dn; i++)
    353 				OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038);	/*5038 */
    354 			for (i = 0; i < sn; i++)
    355 				OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659);	/*6660 */
    356 		}
    357 	} else {
    358 		if ((sn > 0) || (dn > 1)) {	/* NEW :  CASE ONE ELEMENT */
    359 			for (i = 0; i < dn; i++)
    360 				OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993);
    361 			for (i = 0; i < sn; i++)
    362 				OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434);
    363 			for (i = 0; i < dn; i++)
    364 				OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233);
    365 			for (i = 0; i < sn; i++)
    366 				OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633);
    367 			for (i = 0; i < dn; i++)
    368 				OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038);	/*5038 */
    369 			for (i = 0; i < sn; i++)
    370 				OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659);	/*6660 */
    371 		}
    372 	}
    373 }
    374 
    375 static void opj_dwt_encode_stepsize(OPJ_INT32 stepsize, OPJ_INT32 numbps, opj_stepsize_t *bandno_stepsize) {
    376 	OPJ_INT32 p, n;
    377 	p = opj_int_floorlog2(stepsize) - 13;
    378 	n = 11 - opj_int_floorlog2(stepsize);
    379 	bandno_stepsize->mant = (n < 0 ? stepsize >> -n : stepsize << n) & 0x7ff;
    380 	bandno_stepsize->expn = numbps - p;
    381 }
    382 
    383 /*
    384 ==========================================================
    385    DWT interface
    386 ==========================================================
    387 */
    388 
    389 
    390 /* <summary>                            */
    391 /* Forward 5-3 wavelet transform in 2-D. */
    392 /* </summary>                           */
    393 static INLINE OPJ_BOOL opj_dwt_encode_procedure(const opj_tcd_tilecomp_t * tilec, void(*p_function)(OPJ_INT32 *, OPJ_SIZE_T, OPJ_INT32, OPJ_INT32, OPJ_INT32))
    394 {
    395 	OPJ_INT32 i, j, k;
    396 	OPJ_INT32 *a = 00;
    397 	OPJ_INT32 *aj = 00;
    398 	OPJ_INT32 *bj = 00;
    399 	OPJ_INT32 w, l;
    400 
    401 	OPJ_INT32 rw;			/* width of the resolution level computed   */
    402 	OPJ_INT32 rh;			/* height of the resolution level computed  */
    403 	OPJ_SIZE_T l_data_count;
    404 	OPJ_SIZE_T l_data_size;
    405 
    406 	opj_tcd_resolution_t * l_cur_res = 0;
    407 	opj_tcd_resolution_t * l_last_res = 0;
    408 
    409 	w = tilec->x1-tilec->x0;
    410 	l = (OPJ_INT32)tilec->numresolutions-1;
    411 	a = tilec->data;
    412 
    413 	l_cur_res = tilec->resolutions + l;
    414 	l_last_res = l_cur_res - 1;
    415 
    416 	l_data_count = opj_dwt_max_resolution(tilec->resolutions, tilec->numresolutions) * (OPJ_UINT32)sizeof(OPJ_INT32);
    417 	l_data_size = l_data_count * (OPJ_UINT32)sizeof(OPJ_INT32);
    418 	bj = (OPJ_INT32*)opj_malloc(l_data_size);
    419 	if (! bj) {
    420 		return OPJ_FALSE;
    421 	}
    422 	i = l;
    423 
    424 	while (i--) {
    425 		OPJ_INT32 rw1;		/* width of the resolution level once lower than computed one                                       */
    426 		OPJ_INT32 rh1;		/* height of the resolution level once lower than computed one                                      */
    427 		OPJ_INT32 cas_col;	/* 0 = non inversion on horizontal filtering 1 = inversion between low-pass and high-pass filtering */
    428 		OPJ_INT32 cas_row;	/* 0 = non inversion on vertical filtering 1 = inversion between low-pass and high-pass filtering   */
    429 		OPJ_INT32 dn, sn;
    430 
    431 		rw  = l_cur_res->x1 - l_cur_res->x0;
    432 		rh  = l_cur_res->y1 - l_cur_res->y0;
    433 		rw1 = l_last_res->x1 - l_last_res->x0;
    434 		rh1 = l_last_res->y1 - l_last_res->y0;
    435 
    436 		cas_row = l_cur_res->x0 & 1;
    437 		cas_col = l_cur_res->y0 & 1;
    438 
    439 		sn = rh1;
    440 		dn = rh - rh1;
    441 		for (j = 0; j < rw; ++j) {
    442 			aj = a + j;
    443 			for (k = 0; k < rh; ++k) {
    444 				bj[k] = aj[k*w];
    445 			}
    446 
    447 			(*p_function) (bj, l_data_count, dn, sn, cas_col);
    448 
    449 			opj_dwt_deinterleave_v(bj, aj, dn, sn, w, cas_col);
    450 		}
    451 
    452 		sn = rw1;
    453 		dn = rw - rw1;
    454 
    455 		for (j = 0; j < rh; j++) {
    456 			aj = a + j * w;
    457 			for (k = 0; k < rw; k++)  bj[k] = aj[k];
    458 			(*p_function) (bj, l_data_count, dn, sn, cas_row);
    459 			opj_dwt_deinterleave_h(bj, aj, dn, sn, cas_row);
    460 		}
    461 
    462 		l_cur_res = l_last_res;
    463 
    464 		--l_last_res;
    465 	}
    466 
    467 	opj_free(bj);
    468 	return OPJ_TRUE;
    469 }
    470 
    471 /* Forward 5-3 wavelet transform in 2-D. */
    472 /* </summary>                           */
    473 OPJ_BOOL opj_dwt_encode(opj_tcd_tilecomp_t * tilec)
    474 {
    475 	return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1);
    476 }
    477 
    478 /* <summary>                            */
    479 /* Inverse 5-3 wavelet transform in 2-D. */
    480 /* </summary>                           */
    481 OPJ_BOOL opj_dwt_decode(opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres) {
    482 	return opj_dwt_decode_tile(tilec, numres, &opj_dwt_decode_1);
    483 }
    484 
    485 
    486 /* <summary>                          */
    487 /* Get gain of 5-3 wavelet transform. */
    488 /* </summary>                         */
    489 OPJ_UINT32 opj_dwt_getgain(OPJ_UINT32 orient) {
    490 	if (orient == 0)
    491 		return 0;
    492 	if (orient == 1 || orient == 2)
    493 		return 1;
    494 	return 2;
    495 }
    496 
    497 /* <summary>                */
    498 /* Get norm of 5-3 wavelet. */
    499 /* </summary>               */
    500 OPJ_FLOAT64 opj_dwt_getnorm(OPJ_UINT32 level, OPJ_UINT32 orient) {
    501 	return opj_dwt_norms[orient][level];
    502 }
    503 
    504 /* <summary>                             */
    505 /* Forward 9-7 wavelet transform in 2-D. */
    506 /* </summary>                            */
    507 OPJ_BOOL opj_dwt_encode_real(opj_tcd_tilecomp_t * tilec)
    508 {
    509 	return opj_dwt_encode_procedure(tilec,opj_dwt_encode_1_real);
    510 }
    511 
    512 /* <summary>                          */
    513 /* Get gain of 9-7 wavelet transform. */
    514 /* </summary>                         */
    515 OPJ_UINT32 opj_dwt_getgain_real(OPJ_UINT32 orient) {
    516 	(void)orient;
    517 	return 0;
    518 }
    519 
    520 /* <summary>                */
    521 /* Get norm of 9-7 wavelet. */
    522 /* </summary>               */
    523 OPJ_FLOAT64 opj_dwt_getnorm_real(OPJ_UINT32 level, OPJ_UINT32 orient) {
    524 	return opj_dwt_norms_real[orient][level];
    525 }
    526 
    527 void opj_dwt_calc_explicit_stepsizes(opj_tccp_t * tccp, OPJ_UINT32 prec) {
    528 	OPJ_UINT32 numbands, bandno;
    529 	numbands = 3 * tccp->numresolutions - 2;
    530 	for (bandno = 0; bandno < numbands; bandno++) {
    531 		OPJ_FLOAT64 stepsize;
    532 		OPJ_UINT32 resno, level, orient, gain;
    533 
    534 		resno = (bandno == 0) ? 0 : ((bandno - 1) / 3 + 1);
    535 		orient = (bandno == 0) ? 0 : ((bandno - 1) % 3 + 1);
    536 		level = tccp->numresolutions - 1 - resno;
    537 		gain = (tccp->qmfbid == 0) ? 0 : ((orient == 0) ? 0 : (((orient == 1) || (orient == 2)) ? 1 : 2));
    538 		if (tccp->qntsty == J2K_CCP_QNTSTY_NOQNT) {
    539 			stepsize = 1.0;
    540 		} else {
    541 			OPJ_FLOAT64 norm = opj_dwt_norms_real[orient][level];
    542 			stepsize = (1 << (gain)) / norm;
    543 		}
    544 		opj_dwt_encode_stepsize((OPJ_INT32) floor(stepsize * 8192.0), (OPJ_INT32)(prec + gain), &tccp->stepsizes[bandno]);
    545 	}
    546 }
    547 
    548 /* <summary>                             */
    549 /* Determine maximum computed resolution level for inverse wavelet transform */
    550 /* </summary>                            */
    551 static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* restrict r, OPJ_UINT32 i) {
    552 	OPJ_UINT32 mr	= 0;
    553 	OPJ_UINT32 w;
    554 	while( --i ) {
    555 		++r;
    556 		if( mr < ( w = (OPJ_UINT32)(r->x1 - r->x0) ) )
    557 			mr = w ;
    558 		if( mr < ( w = (OPJ_UINT32)(r->y1 - r->y0) ) )
    559 			mr = w ;
    560 	}
    561 	return mr ;
    562 }
    563 
    564 /* <summary>                            */
    565 /* Inverse wavelet transform in 2-D.     */
    566 /* </summary>                           */
    567 static OPJ_BOOL opj_dwt_decode_tile(const opj_tcd_tilecomp_t* tilec, OPJ_UINT32 numres, DWT1DFN dwt_1D) {
    568 	opj_dwt_t h;
    569 	opj_dwt_t v;
    570 
    571 	opj_tcd_resolution_t* tr = tilec->resolutions;
    572 
    573 	OPJ_UINT32 rw = (OPJ_UINT32)(tr->x1 - tr->x0);	/* width of the resolution level computed */
    574 	OPJ_UINT32 rh = (OPJ_UINT32)(tr->y1 - tr->y0);	/* height of the resolution level computed */
    575 
    576 	OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
    577 
    578 	h.mem_count = opj_dwt_max_resolution(tr, numres);
    579 	h.mem = (OPJ_INT32*)opj_aligned_malloc(h.mem_count * sizeof(OPJ_INT32));
    580 	if (! h.mem){
    581 		/* FIXME event manager error callback */
    582 		return OPJ_FALSE;
    583 	}
    584 
    585 	v.mem_count = h.mem_count;
    586 	v.mem = h.mem;
    587 
    588 	while( --numres) {
    589 		OPJ_INT32 * restrict tiledp = tilec->data;
    590 		OPJ_UINT32 j;
    591 
    592 		++tr;
    593 		h.sn = (OPJ_INT32)rw;
    594 		v.sn = (OPJ_INT32)rh;
    595 
    596 		rw = (OPJ_UINT32)(tr->x1 - tr->x0);
    597 		rh = (OPJ_UINT32)(tr->y1 - tr->y0);
    598 
    599 		h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
    600 		h.cas = tr->x0 % 2;
    601 
    602 		for(j = 0; j < rh; ++j) {
    603 			opj_dwt_interleave_h(&h, &tiledp[j*w]);
    604 			(dwt_1D)(&h);
    605 			memcpy(&tiledp[j*w], h.mem, rw * sizeof(OPJ_INT32));
    606 		}
    607 
    608 		v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
    609 		v.cas = tr->y0 % 2;
    610 
    611 		for(j = 0; j < rw; ++j){
    612 			OPJ_UINT32 k;
    613 			opj_dwt_interleave_v(&v, &tiledp[j], (OPJ_INT32)w);
    614 			(dwt_1D)(&v);
    615 			for(k = 0; k < rh; ++k) {
    616 				tiledp[k * w + j] = v.mem[k];
    617 			}
    618 		}
    619 	}
    620 	opj_aligned_free(h.mem);
    621 	return OPJ_TRUE;
    622 }
    623 
    624 static void opj_v4dwt_interleave_h(opj_v4dwt_t* restrict w, OPJ_FLOAT32* restrict a, OPJ_INT32 x, OPJ_INT32 size){
    625 	OPJ_FLOAT32* restrict bi = (OPJ_FLOAT32*) (w->wavelet + w->cas);
    626 	OPJ_INT32 count = w->sn;
    627 	OPJ_INT32 i, k;
    628 
    629 	for(k = 0; k < 2; ++k){
    630 		if ( count + 3 * x < size && ((size_t) a & 0x0f) == 0 && ((size_t) bi & 0x0f) == 0 && (x & 0x0f) == 0 ) {
    631 			/* Fast code path */
    632 			for(i = 0; i < count; ++i){
    633 				OPJ_INT32 j = i;
    634 				bi[i*8    ] = a[j];
    635 				j += x;
    636 				bi[i*8 + 1] = a[j];
    637 				j += x;
    638 				bi[i*8 + 2] = a[j];
    639 				j += x;
    640 				bi[i*8 + 3] = a[j];
    641 			}
    642 		}
    643 		else {
    644 			/* Slow code path */
    645 			for(i = 0; i < count; ++i){
    646 				OPJ_INT32 j = i;
    647 				bi[i*8    ] = a[j];
    648 				j += x;
    649 				if(j >= size) continue;
    650 				bi[i*8 + 1] = a[j];
    651 				j += x;
    652 				if(j >= size) continue;
    653 				bi[i*8 + 2] = a[j];
    654 				j += x;
    655 				if(j >= size) continue;
    656 				bi[i*8 + 3] = a[j]; /* This one*/
    657 			}
    658 		}
    659 
    660 		bi = (OPJ_FLOAT32*) (w->wavelet + 1 - w->cas);
    661 		a += w->sn;
    662 		size -= w->sn;
    663 		count = w->dn;
    664 	}
    665 }
    666 
    667 static void opj_v4dwt_interleave_v(opj_v4dwt_t* restrict v , OPJ_FLOAT32* restrict a , OPJ_INT32 x, OPJ_INT32 nb_elts_read){
    668 	opj_v4_t* restrict bi = v->wavelet + v->cas;
    669 	OPJ_INT32 i;
    670 
    671 	for(i = 0; i < v->sn; ++i){
    672 		memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
    673 	}
    674 
    675 	a += v->sn * x;
    676 	bi = v->wavelet + 1 - v->cas;
    677 
    678 	for(i = 0; i < v->dn; ++i){
    679 		memcpy(&bi[i*2], &a[i*x], (size_t)nb_elts_read * sizeof(OPJ_FLOAT32));
    680 	}
    681 }
    682 
    683 #ifdef __SSE__
    684 
    685 static void opj_v4dwt_decode_step1_sse(opj_v4_t* w, OPJ_INT32 count, const __m128 c){
    686 	__m128* restrict vw = (__m128*) w;
    687 	OPJ_INT32 i;
    688 	/* 4x unrolled loop */
    689 	for(i = 0; i < count >> 2; ++i){
    690 		*vw = _mm_mul_ps(*vw, c);
    691 		vw += 2;
    692 		*vw = _mm_mul_ps(*vw, c);
    693 		vw += 2;
    694 		*vw = _mm_mul_ps(*vw, c);
    695 		vw += 2;
    696 		*vw = _mm_mul_ps(*vw, c);
    697 		vw += 2;
    698 	}
    699 	count &= 3;
    700 	for(i = 0; i < count; ++i){
    701 		*vw = _mm_mul_ps(*vw, c);
    702 		vw += 2;
    703 	}
    704 }
    705 
    706 void opj_v4dwt_decode_step2_sse(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, __m128 c){
    707 	__m128* restrict vl = (__m128*) l;
    708 	__m128* restrict vw = (__m128*) w;
    709 	OPJ_INT32 i;
    710 	__m128 tmp1, tmp2, tmp3;
    711 	tmp1 = vl[0];
    712 	for(i = 0; i < m; ++i){
    713 		tmp2 = vw[-1];
    714 		tmp3 = vw[ 0];
    715 		vw[-1] = _mm_add_ps(tmp2, _mm_mul_ps(_mm_add_ps(tmp1, tmp3), c));
    716 		tmp1 = tmp3;
    717 		vw += 2;
    718 	}
    719 	vl = vw - 2;
    720 	if(m >= k){
    721 		return;
    722 	}
    723 	c = _mm_add_ps(c, c);
    724 	c = _mm_mul_ps(c, vl[0]);
    725 	for(; m < k; ++m){
    726 		__m128 tmp = vw[-1];
    727 		vw[-1] = _mm_add_ps(tmp, c);
    728 		vw += 2;
    729 	}
    730 }
    731 
    732 #else
    733 
    734 static void opj_v4dwt_decode_step1(opj_v4_t* w, OPJ_INT32 count, const OPJ_FLOAT32 c)
    735 {
    736 	OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
    737 	OPJ_INT32 i;
    738 	for(i = 0; i < count; ++i){
    739 		OPJ_FLOAT32 tmp1 = fw[i*8    ];
    740 		OPJ_FLOAT32 tmp2 = fw[i*8 + 1];
    741 		OPJ_FLOAT32 tmp3 = fw[i*8 + 2];
    742 		OPJ_FLOAT32 tmp4 = fw[i*8 + 3];
    743 		fw[i*8    ] = tmp1 * c;
    744 		fw[i*8 + 1] = tmp2 * c;
    745 		fw[i*8 + 2] = tmp3 * c;
    746 		fw[i*8 + 3] = tmp4 * c;
    747 	}
    748 }
    749 
    750 static void opj_v4dwt_decode_step2(opj_v4_t* l, opj_v4_t* w, OPJ_INT32 k, OPJ_INT32 m, OPJ_FLOAT32 c)
    751 {
    752 	OPJ_FLOAT32* restrict fl = (OPJ_FLOAT32*) l;
    753 	OPJ_FLOAT32* restrict fw = (OPJ_FLOAT32*) w;
    754 	OPJ_INT32 i;
    755 	for(i = 0; i < m; ++i){
    756 		OPJ_FLOAT32 tmp1_1 = fl[0];
    757 		OPJ_FLOAT32 tmp1_2 = fl[1];
    758 		OPJ_FLOAT32 tmp1_3 = fl[2];
    759 		OPJ_FLOAT32 tmp1_4 = fl[3];
    760 		OPJ_FLOAT32 tmp2_1 = fw[-4];
    761 		OPJ_FLOAT32 tmp2_2 = fw[-3];
    762 		OPJ_FLOAT32 tmp2_3 = fw[-2];
    763 		OPJ_FLOAT32 tmp2_4 = fw[-1];
    764 		OPJ_FLOAT32 tmp3_1 = fw[0];
    765 		OPJ_FLOAT32 tmp3_2 = fw[1];
    766 		OPJ_FLOAT32 tmp3_3 = fw[2];
    767 		OPJ_FLOAT32 tmp3_4 = fw[3];
    768 		fw[-4] = tmp2_1 + ((tmp1_1 + tmp3_1) * c);
    769 		fw[-3] = tmp2_2 + ((tmp1_2 + tmp3_2) * c);
    770 		fw[-2] = tmp2_3 + ((tmp1_3 + tmp3_3) * c);
    771 		fw[-1] = tmp2_4 + ((tmp1_4 + tmp3_4) * c);
    772 		fl = fw;
    773 		fw += 8;
    774 	}
    775 	if(m < k){
    776 		OPJ_FLOAT32 c1;
    777 		OPJ_FLOAT32 c2;
    778 		OPJ_FLOAT32 c3;
    779 		OPJ_FLOAT32 c4;
    780 		c += c;
    781 		c1 = fl[0] * c;
    782 		c2 = fl[1] * c;
    783 		c3 = fl[2] * c;
    784 		c4 = fl[3] * c;
    785 		for(; m < k; ++m){
    786 			OPJ_FLOAT32 tmp1 = fw[-4];
    787 			OPJ_FLOAT32 tmp2 = fw[-3];
    788 			OPJ_FLOAT32 tmp3 = fw[-2];
    789 			OPJ_FLOAT32 tmp4 = fw[-1];
    790 			fw[-4] = tmp1 + c1;
    791 			fw[-3] = tmp2 + c2;
    792 			fw[-2] = tmp3 + c3;
    793 			fw[-1] = tmp4 + c4;
    794 			fw += 8;
    795 		}
    796 	}
    797 }
    798 
    799 #endif
    800 
    801 /* <summary>                             */
    802 /* Inverse 9-7 wavelet transform in 1-D. */
    803 /* </summary>                            */
    804 void opj_v4dwt_decode(opj_v4dwt_t* restrict dwt)
    805 {
    806 	OPJ_INT32 a, b;
    807 	if(dwt->cas == 0) {
    808 		if(!((dwt->dn > 0) || (dwt->sn > 1))){
    809 			return;
    810 		}
    811 		a = 0;
    812 		b = 1;
    813 	}else{
    814 		if(!((dwt->sn > 0) || (dwt->dn > 1))) {
    815 			return;
    816 		}
    817 		a = 1;
    818 		b = 0;
    819 	}
    820 #ifdef __SSE__
    821 	opj_v4dwt_decode_step1_sse(dwt->wavelet+a, dwt->sn, _mm_set1_ps(opj_K));
    822 	opj_v4dwt_decode_step1_sse(dwt->wavelet+b, dwt->dn, _mm_set1_ps(opj_c13318));
    823 	opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_delta));
    824 	opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_gamma));
    825 	opj_v4dwt_decode_step2_sse(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), _mm_set1_ps(opj_dwt_beta));
    826 	opj_v4dwt_decode_step2_sse(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), _mm_set1_ps(opj_dwt_alpha));
    827 #else
    828 	opj_v4dwt_decode_step1(dwt->wavelet+a, dwt->sn, opj_K);
    829 	opj_v4dwt_decode_step1(dwt->wavelet+b, dwt->dn, opj_c13318);
    830 	opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_delta);
    831 	opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_gamma);
    832 	opj_v4dwt_decode_step2(dwt->wavelet+b, dwt->wavelet+a+1, dwt->sn, opj_int_min(dwt->sn, dwt->dn-a), opj_dwt_beta);
    833 	opj_v4dwt_decode_step2(dwt->wavelet+a, dwt->wavelet+b+1, dwt->dn, opj_int_min(dwt->dn, dwt->sn-b), opj_dwt_alpha);
    834 #endif
    835 }
    836 
    837 
    838 /* <summary>                             */
    839 /* Inverse 9-7 wavelet transform in 2-D. */
    840 /* </summary>                            */
    841 OPJ_BOOL opj_dwt_decode_real(opj_tcd_tilecomp_t* restrict tilec, OPJ_UINT32 numres)
    842 {
    843 	opj_v4dwt_t h;
    844 	opj_v4dwt_t v;
    845 
    846 	opj_tcd_resolution_t* res = tilec->resolutions;
    847 
    848 	OPJ_UINT32 rw = (OPJ_UINT32)(res->x1 - res->x0);	/* width of the resolution level computed */
    849 	OPJ_UINT32 rh = (OPJ_UINT32)(res->y1 - res->y0);	/* height of the resolution level computed */
    850 
    851 	OPJ_UINT32 w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
    852 
    853 	h.wavelet = (opj_v4_t*) opj_aligned_malloc((opj_dwt_max_resolution(res, numres)+5) * sizeof(opj_v4_t));
    854 	if (!h.wavelet) {
    855 		/* FIXME event manager error callback */
    856 		return OPJ_FALSE;
    857 	}
    858 	v.wavelet = h.wavelet;
    859 
    860 	while( --numres) {
    861 		OPJ_FLOAT32 * restrict aj = (OPJ_FLOAT32*) tilec->data;
    862 		OPJ_UINT32 bufsize = (OPJ_UINT32)((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0));
    863 		OPJ_INT32 j;
    864 
    865 		h.sn = (OPJ_INT32)rw;
    866 		v.sn = (OPJ_INT32)rh;
    867 
    868 		++res;
    869 
    870 		rw = (OPJ_UINT32)(res->x1 - res->x0);	/* width of the resolution level computed */
    871 		rh = (OPJ_UINT32)(res->y1 - res->y0);	/* height of the resolution level computed */
    872 
    873 		h.dn = (OPJ_INT32)(rw - (OPJ_UINT32)h.sn);
    874 		h.cas = res->x0 % 2;
    875 
    876 		for(j = (OPJ_INT32)rh; j > 3; j -= 4) {
    877 			OPJ_INT32 k;
    878 			opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize);
    879 			opj_v4dwt_decode(&h);
    880 
    881 			for(k = (OPJ_INT32)rw; --k >= 0;){
    882 				aj[k               ] = h.wavelet[k].f[0];
    883 				aj[k+(OPJ_INT32)w  ] = h.wavelet[k].f[1];
    884 				aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2];
    885 				aj[k+(OPJ_INT32)w*3] = h.wavelet[k].f[3];
    886 			}
    887 
    888 			aj += w*4;
    889 			bufsize -= w*4;
    890 		}
    891 
    892 		if (rh & 0x03) {
    893 			OPJ_INT32 k;
    894 			j = rh & 0x03;
    895 			opj_v4dwt_interleave_h(&h, aj, (OPJ_INT32)w, (OPJ_INT32)bufsize);
    896 			opj_v4dwt_decode(&h);
    897 			for(k = (OPJ_INT32)rw; --k >= 0;){
    898 				switch(j) {
    899 					case 3: aj[k+(OPJ_INT32)w*2] = h.wavelet[k].f[2];
    900 					case 2: aj[k+(OPJ_INT32)w  ] = h.wavelet[k].f[1];
    901 					case 1: aj[k               ] = h.wavelet[k].f[0];
    902 				}
    903 			}
    904 		}
    905 
    906 		v.dn = (OPJ_INT32)(rh - (OPJ_UINT32)v.sn);
    907 		v.cas = res->y0 % 2;
    908 
    909 		aj = (OPJ_FLOAT32*) tilec->data;
    910 		for(j = (OPJ_INT32)rw; j > 3; j -= 4){
    911 			OPJ_UINT32 k;
    912 
    913 			opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, 4);
    914 			opj_v4dwt_decode(&v);
    915 
    916 			for(k = 0; k < rh; ++k){
    917 				memcpy(&aj[k*w], &v.wavelet[k], 4 * sizeof(OPJ_FLOAT32));
    918 			}
    919 			aj += 4;
    920 		}
    921 
    922 		if (rw & 0x03){
    923 			OPJ_UINT32 k;
    924 
    925 			j = rw & 0x03;
    926 
    927 			opj_v4dwt_interleave_v(&v, aj, (OPJ_INT32)w, j);
    928 			opj_v4dwt_decode(&v);
    929 
    930 			for(k = 0; k < rh; ++k){
    931 				memcpy(&aj[k*w], &v.wavelet[k], (size_t)j * sizeof(OPJ_FLOAT32));
    932 			}
    933 		}
    934 	}
    935 
    936 	opj_aligned_free(h.wavelet);
    937 	return OPJ_TRUE;
    938 }
    939