Home | History | Annotate | Download | only in libopenjpeg20
      1 /*
      2  * The copyright in this software is being made available under the 2-clauses
      3  * BSD License, included below. This software may be subject to other third
      4  * party and contributor rights, including patent rights, and no such rights
      5  * are granted under this license.
      6  *
      7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
      8  * Copyright (c) 2002-2014, Professor Benoit Macq
      9  * Copyright (c) 2001-2003, David Janssens
     10  * Copyright (c) 2002-2003, Yannick Verschueren
     11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
     12  * Copyright (c) 2003-2014, Antonin Descampe
     13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
     14  * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
     15  * Copyright (c) 2012, CS Systemes d'Information, France
     16  * All rights reserved.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
     28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 #ifdef __SSE__
     41 #include <xmmintrin.h>
     42 #endif
     43 #ifdef __SSE2__
     44 #include <emmintrin.h>
     45 #endif
     46 #ifdef __SSE4_1__
     47 #include <smmintrin.h>
     48 #endif
     49 
     50 #include "opj_includes.h"
     51 
     52 /* <summary> */
     53 /* This table contains the norms of the basis function of the reversible MCT. */
     54 /* </summary> */
     55 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
     56 
     57 /* <summary> */
     58 /* This table contains the norms of the basis function of the irreversible MCT. */
     59 /* </summary> */
     60 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
     61 
     62 const OPJ_FLOAT64 * opj_mct_get_mct_norms ()
     63 {
     64 	return opj_mct_norms;
     65 }
     66 
     67 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
     68 {
     69 	return opj_mct_norms_real;
     70 }
     71 
     72 /* <summary> */
     73 /* Foward reversible MCT. */
     74 /* </summary> */
     75 #ifdef __SSE2__
     76 void opj_mct_encode(
     77 		OPJ_INT32* restrict c0,
     78 		OPJ_INT32* restrict c1,
     79 		OPJ_INT32* restrict c2,
     80 		OPJ_UINT32 n)
     81 {
     82 	OPJ_SIZE_T i;
     83 	const OPJ_SIZE_T len = n;
     84 
     85 	for(i = 0; i < (len & ~3U); i += 4) {
     86 		__m128i y, u, v;
     87 		__m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
     88 		__m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
     89 		__m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
     90 		y = _mm_add_epi32(g, g);
     91 		y = _mm_add_epi32(y, b);
     92 		y = _mm_add_epi32(y, r);
     93 		y = _mm_srai_epi32(y, 2);
     94 		u = _mm_sub_epi32(b, g);
     95 		v = _mm_sub_epi32(r, g);
     96 		_mm_store_si128((__m128i *)&(c0[i]), y);
     97 		_mm_store_si128((__m128i *)&(c1[i]), u);
     98 		_mm_store_si128((__m128i *)&(c2[i]), v);
     99 	}
    100 
    101 	for(; i < len; ++i) {
    102 		OPJ_INT32 r = c0[i];
    103 		OPJ_INT32 g = c1[i];
    104 		OPJ_INT32 b = c2[i];
    105 		OPJ_INT32 y = (r + (g * 2) + b) >> 2;
    106 		OPJ_INT32 u = b - g;
    107 		OPJ_INT32 v = r - g;
    108 		c0[i] = y;
    109 		c1[i] = u;
    110 		c2[i] = v;
    111 	}
    112 }
    113 #else
    114 void opj_mct_encode(
    115 		OPJ_INT32* restrict c0,
    116 		OPJ_INT32* restrict c1,
    117 		OPJ_INT32* restrict c2,
    118 		OPJ_UINT32 n)
    119 {
    120 	OPJ_SIZE_T i;
    121 	const OPJ_SIZE_T len = n;
    122 
    123 	for(i = 0; i < len; ++i) {
    124 		OPJ_INT32 r = c0[i];
    125 		OPJ_INT32 g = c1[i];
    126 		OPJ_INT32 b = c2[i];
    127 		OPJ_INT32 y = (r + (g * 2) + b) >> 2;
    128 		OPJ_INT32 u = b - g;
    129 		OPJ_INT32 v = r - g;
    130 		c0[i] = y;
    131 		c1[i] = u;
    132 		c2[i] = v;
    133 	}
    134 }
    135 #endif
    136 
    137 /* <summary> */
    138 /* Inverse reversible MCT. */
    139 /* </summary> */
    140 #ifdef __SSE2__
    141 void opj_mct_decode(
    142 		OPJ_INT32* restrict c0,
    143 		OPJ_INT32* restrict c1,
    144 		OPJ_INT32* restrict c2,
    145 		OPJ_UINT32 n)
    146 {
    147 	OPJ_SIZE_T i;
    148 	const OPJ_SIZE_T len = n;
    149 
    150 	for(i = 0; i < (len & ~3U); i += 4) {
    151 		__m128i r, g, b;
    152 		__m128i y = _mm_load_si128((const __m128i *)&(c0[i]));
    153 		__m128i u = _mm_load_si128((const __m128i *)&(c1[i]));
    154 		__m128i v = _mm_load_si128((const __m128i *)&(c2[i]));
    155 		g = y;
    156 		g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
    157 		r = _mm_add_epi32(v, g);
    158 		b = _mm_add_epi32(u, g);
    159 		_mm_store_si128((__m128i *)&(c0[i]), r);
    160 		_mm_store_si128((__m128i *)&(c1[i]), g);
    161 		_mm_store_si128((__m128i *)&(c2[i]), b);
    162 	}
    163 	for (; i < len; ++i) {
    164 		OPJ_INT32 y = c0[i];
    165 		OPJ_INT32 u = c1[i];
    166 		OPJ_INT32 v = c2[i];
    167 		OPJ_INT32 g = y - ((u + v) >> 2);
    168 		OPJ_INT32 r = v + g;
    169 		OPJ_INT32 b = u + g;
    170 		c0[i] = r;
    171 		c1[i] = g;
    172 		c2[i] = b;
    173 	}
    174 }
    175 #else
    176 void opj_mct_decode(
    177 		OPJ_INT32* restrict c0,
    178 		OPJ_INT32* restrict c1,
    179 		OPJ_INT32* restrict c2,
    180 		OPJ_UINT32 n)
    181 {
    182 	OPJ_UINT32 i;
    183 	for (i = 0; i < n; ++i) {
    184 		OPJ_INT32 y = c0[i];
    185 		OPJ_INT32 u = c1[i];
    186 		OPJ_INT32 v = c2[i];
    187 		OPJ_INT32 g = y - ((u + v) >> 2);
    188 		OPJ_INT32 r = v + g;
    189 		OPJ_INT32 b = u + g;
    190 		c0[i] = r;
    191 		c1[i] = g;
    192 		c2[i] = b;
    193 	}
    194 }
    195 #endif
    196 
    197 /* <summary> */
    198 /* Get norm of basis function of reversible MCT. */
    199 /* </summary> */
    200 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
    201 	return opj_mct_norms[compno];
    202 }
    203 
    204 /* <summary> */
    205 /* Foward irreversible MCT. */
    206 /* </summary> */
    207 #ifdef __SSE4_1__
    208 void opj_mct_encode_real(
    209 												 OPJ_INT32* restrict c0,
    210 												 OPJ_INT32* restrict c1,
    211 												 OPJ_INT32* restrict c2,
    212 												 OPJ_UINT32 n)
    213 {
    214 	OPJ_SIZE_T i;
    215 	const OPJ_SIZE_T len = n;
    216 
    217 	const __m128i ry = _mm_set1_epi32(2449);
    218 	const __m128i gy = _mm_set1_epi32(4809);
    219 	const __m128i by = _mm_set1_epi32(934);
    220 	const __m128i ru = _mm_set1_epi32(1382);
    221 	const __m128i gu = _mm_set1_epi32(2714);
    222 	/* const __m128i bu = _mm_set1_epi32(4096); */
    223 	/* const __m128i rv = _mm_set1_epi32(4096); */
    224 	const __m128i gv = _mm_set1_epi32(3430);
    225 	const __m128i bv = _mm_set1_epi32(666);
    226 	const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_SHUFFLE(1, 0, 1, 0));
    227 
    228 	for(i = 0; i < (len & ~3U); i += 4) {
    229 		__m128i lo, hi;
    230 		__m128i y, u, v;
    231 		__m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
    232 		__m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
    233 		__m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
    234 
    235 		lo = r;
    236 		hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    237 		lo = _mm_mul_epi32(lo, ry);
    238 		hi = _mm_mul_epi32(hi, ry);
    239 		lo = _mm_add_epi64(lo, mulround);
    240 		hi = _mm_add_epi64(hi, mulround);
    241 		lo = _mm_srli_epi64(lo, 13);
    242 		hi = _mm_slli_epi64(hi, 32-13);
    243 		y = _mm_blend_epi16(lo, hi, 0xCC);
    244 
    245 		lo = g;
    246 		hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    247 		lo = _mm_mul_epi32(lo, gy);
    248 		hi = _mm_mul_epi32(hi, gy);
    249 		lo = _mm_add_epi64(lo, mulround);
    250 		hi = _mm_add_epi64(hi, mulround);
    251 		lo = _mm_srli_epi64(lo, 13);
    252 		hi = _mm_slli_epi64(hi, 32-13);
    253 		y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
    254 
    255 		lo = b;
    256 		hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    257 		lo = _mm_mul_epi32(lo, by);
    258 		hi = _mm_mul_epi32(hi, by);
    259 		lo = _mm_add_epi64(lo, mulround);
    260 		hi = _mm_add_epi64(hi, mulround);
    261 		lo = _mm_srli_epi64(lo, 13);
    262 		hi = _mm_slli_epi64(hi, 32-13);
    263 		y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
    264 		_mm_store_si128((__m128i *)&(c0[i]), y);
    265 
    266 		/*lo = b;
    267 		hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    268 		lo = _mm_mul_epi32(lo, mulround);
    269 		hi = _mm_mul_epi32(hi, mulround);*/
    270 		lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
    271 		hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
    272 		lo = _mm_slli_epi64(lo, 12);
    273 		hi = _mm_slli_epi64(hi, 12);
    274 		lo = _mm_add_epi64(lo, mulround);
    275 		hi = _mm_add_epi64(hi, mulround);
    276 		lo = _mm_srli_epi64(lo, 13);
    277 		hi = _mm_slli_epi64(hi, 32-13);
    278 		u = _mm_blend_epi16(lo, hi, 0xCC);
    279 
    280 		lo = r;
    281 		hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    282 		lo = _mm_mul_epi32(lo, ru);
    283 		hi = _mm_mul_epi32(hi, ru);
    284 		lo = _mm_add_epi64(lo, mulround);
    285 		hi = _mm_add_epi64(hi, mulround);
    286 		lo = _mm_srli_epi64(lo, 13);
    287 		hi = _mm_slli_epi64(hi, 32-13);
    288 		u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
    289 
    290 		lo = g;
    291 		hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    292 		lo = _mm_mul_epi32(lo, gu);
    293 		hi = _mm_mul_epi32(hi, gu);
    294 		lo = _mm_add_epi64(lo, mulround);
    295 		hi = _mm_add_epi64(hi, mulround);
    296 		lo = _mm_srli_epi64(lo, 13);
    297 		hi = _mm_slli_epi64(hi, 32-13);
    298 		u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
    299 		_mm_store_si128((__m128i *)&(c1[i]), u);
    300 
    301 		/*lo = r;
    302 		hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    303 		lo = _mm_mul_epi32(lo, mulround);
    304 		hi = _mm_mul_epi32(hi, mulround);*/
    305 		lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
    306 		hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
    307 		lo = _mm_slli_epi64(lo, 12);
    308 		hi = _mm_slli_epi64(hi, 12);
    309 		lo = _mm_add_epi64(lo, mulround);
    310 		hi = _mm_add_epi64(hi, mulround);
    311 		lo = _mm_srli_epi64(lo, 13);
    312 		hi = _mm_slli_epi64(hi, 32-13);
    313 		v = _mm_blend_epi16(lo, hi, 0xCC);
    314 
    315 		lo = g;
    316 		hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    317 		lo = _mm_mul_epi32(lo, gv);
    318 		hi = _mm_mul_epi32(hi, gv);
    319 		lo = _mm_add_epi64(lo, mulround);
    320 		hi = _mm_add_epi64(hi, mulround);
    321 		lo = _mm_srli_epi64(lo, 13);
    322 		hi = _mm_slli_epi64(hi, 32-13);
    323 		v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
    324 
    325 		lo = b;
    326 		hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    327 		lo = _mm_mul_epi32(lo, bv);
    328 		hi = _mm_mul_epi32(hi, bv);
    329 		lo = _mm_add_epi64(lo, mulround);
    330 		hi = _mm_add_epi64(hi, mulround);
    331 		lo = _mm_srli_epi64(lo, 13);
    332 		hi = _mm_slli_epi64(hi, 32-13);
    333 		v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
    334 		_mm_store_si128((__m128i *)&(c2[i]), v);
    335 	}
    336 	for(; i < len; ++i) {
    337 		OPJ_INT32 r = c0[i];
    338 		OPJ_INT32 g = c1[i];
    339 		OPJ_INT32 b = c2[i];
    340 		OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
    341 		OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
    342 		OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
    343 		c0[i] = y;
    344 		c1[i] = u;
    345 		c2[i] = v;
    346 	}
    347 }
    348 #else
    349 void opj_mct_encode_real(
    350 		OPJ_INT32* restrict c0,
    351 		OPJ_INT32* restrict c1,
    352 		OPJ_INT32* restrict c2,
    353 		OPJ_UINT32 n)
    354 {
    355 	OPJ_UINT32 i;
    356 	for(i = 0; i < n; ++i) {
    357 		OPJ_INT32 r = c0[i];
    358 		OPJ_INT32 g = c1[i];
    359 		OPJ_INT32 b = c2[i];
    360 		OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
    361 		OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
    362 		OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
    363 		c0[i] = y;
    364 		c1[i] = u;
    365 		c2[i] = v;
    366 	}
    367 }
    368 #endif
    369 
    370 /* <summary> */
    371 /* Inverse irreversible MCT. */
    372 /* </summary> */
    373 void opj_mct_decode_real(
    374 		OPJ_FLOAT32* restrict c0,
    375 		OPJ_FLOAT32* restrict c1,
    376 		OPJ_FLOAT32* restrict c2,
    377 		OPJ_UINT32 n)
    378 {
    379 	OPJ_UINT32 i;
    380 #ifdef __SSE__
    381 	__m128 vrv, vgu, vgv, vbu;
    382 	vrv = _mm_set1_ps(1.402f);
    383 	vgu = _mm_set1_ps(0.34413f);
    384 	vgv = _mm_set1_ps(0.71414f);
    385 	vbu = _mm_set1_ps(1.772f);
    386 	for (i = 0; i < (n >> 3); ++i) {
    387 		__m128 vy, vu, vv;
    388 		__m128 vr, vg, vb;
    389 
    390 		vy = _mm_load_ps(c0);
    391 		vu = _mm_load_ps(c1);
    392 		vv = _mm_load_ps(c2);
    393 		vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
    394 		vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
    395 		vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
    396 		_mm_store_ps(c0, vr);
    397 		_mm_store_ps(c1, vg);
    398 		_mm_store_ps(c2, vb);
    399 		c0 += 4;
    400 		c1 += 4;
    401 		c2 += 4;
    402 
    403 		vy = _mm_load_ps(c0);
    404 		vu = _mm_load_ps(c1);
    405 		vv = _mm_load_ps(c2);
    406 		vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
    407 		vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
    408 		vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
    409 		_mm_store_ps(c0, vr);
    410 		_mm_store_ps(c1, vg);
    411 		_mm_store_ps(c2, vb);
    412 		c0 += 4;
    413 		c1 += 4;
    414 		c2 += 4;
    415 	}
    416 	n &= 7;
    417 #endif
    418 	for(i = 0; i < n; ++i) {
    419 		OPJ_FLOAT32 y = c0[i];
    420 		OPJ_FLOAT32 u = c1[i];
    421 		OPJ_FLOAT32 v = c2[i];
    422 		OPJ_FLOAT32 r = y + (v * 1.402f);
    423 		OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
    424 		OPJ_FLOAT32 b = y + (u * 1.772f);
    425 		c0[i] = r;
    426 		c1[i] = g;
    427 		c2[i] = b;
    428 	}
    429 }
    430 
    431 /* <summary> */
    432 /* Get norm of basis function of irreversible MCT. */
    433 /* </summary> */
    434 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) {
    435 	return opj_mct_norms_real[compno];
    436 }
    437 
    438 
    439 OPJ_BOOL opj_mct_encode_custom(
    440 					   OPJ_BYTE * pCodingdata,
    441 					   OPJ_UINT32 n,
    442 					   OPJ_BYTE ** pData,
    443 					   OPJ_UINT32 pNbComp,
    444 					   OPJ_UINT32 isSigned)
    445 {
    446 	OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
    447 	OPJ_UINT32 i;
    448 	OPJ_UINT32 j;
    449 	OPJ_UINT32 k;
    450 	OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
    451 	OPJ_INT32 * lCurrentData = 00;
    452 	OPJ_INT32 * lCurrentMatrix = 00;
    453 	OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
    454 	OPJ_UINT32 lMultiplicator = 1 << 13;
    455 	OPJ_INT32 * lMctPtr;
    456 
    457     OPJ_ARG_NOT_USED(isSigned);
    458 
    459 	lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));
    460 	if (! lCurrentData) {
    461 		return OPJ_FALSE;
    462 	}
    463 
    464 	lCurrentMatrix = lCurrentData + pNbComp;
    465 
    466 	for (i =0;i<lNbMatCoeff;++i) {
    467 		lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
    468 	}
    469 
    470 	for (i = 0; i < n; ++i)  {
    471 		lMctPtr = lCurrentMatrix;
    472 		for (j=0;j<pNbComp;++j) {
    473 			lCurrentData[j] = (*(lData[j]));
    474 		}
    475 
    476 		for (j=0;j<pNbComp;++j) {
    477 			*(lData[j]) = 0;
    478 			for (k=0;k<pNbComp;++k) {
    479 				*(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
    480 				++lMctPtr;
    481 			}
    482 
    483 			++lData[j];
    484 		}
    485 	}
    486 
    487 	opj_free(lCurrentData);
    488 
    489 	return OPJ_TRUE;
    490 }
    491 
    492 OPJ_BOOL opj_mct_decode_custom(
    493 					   OPJ_BYTE * pDecodingData,
    494 					   OPJ_UINT32 n,
    495 					   OPJ_BYTE ** pData,
    496 					   OPJ_UINT32 pNbComp,
    497 					   OPJ_UINT32 isSigned)
    498 {
    499 	OPJ_FLOAT32 * lMct;
    500 	OPJ_UINT32 i;
    501 	OPJ_UINT32 j;
    502 	OPJ_UINT32 k;
    503 
    504 	OPJ_FLOAT32 * lCurrentData = 00;
    505 	OPJ_FLOAT32 * lCurrentResult = 00;
    506 	OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
    507 
    508     OPJ_ARG_NOT_USED(isSigned);
    509 
    510 	lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));
    511 	if (! lCurrentData) {
    512 		return OPJ_FALSE;
    513 	}
    514 	lCurrentResult = lCurrentData + pNbComp;
    515 
    516 	for (i = 0; i < n; ++i) {
    517 		lMct = (OPJ_FLOAT32 *) pDecodingData;
    518 		for (j=0;j<pNbComp;++j) {
    519 			lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));
    520 		}
    521 		for (j=0;j<pNbComp;++j) {
    522 			lCurrentResult[j] = 0;
    523 			for	(k=0;k<pNbComp;++k)	{
    524 				lCurrentResult[j] += *(lMct++) * lCurrentData[k];
    525 			}
    526 			*(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);
    527 		}
    528 	}
    529 	opj_free(lCurrentData);
    530 	return OPJ_TRUE;
    531 }
    532 
    533 void opj_calculate_norms(	OPJ_FLOAT64 * pNorms,
    534 							OPJ_UINT32 pNbComps,
    535 							OPJ_FLOAT32 * pMatrix)
    536 {
    537 	OPJ_UINT32 i,j,lIndex;
    538 	OPJ_FLOAT32 lCurrentValue;
    539 	OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
    540 	OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
    541 
    542 	for	(i=0;i<pNbComps;++i) {
    543 		lNorms[i] = 0;
    544 		lIndex = i;
    545 
    546 		for	(j=0;j<pNbComps;++j) {
    547 			lCurrentValue = lMatrix[lIndex];
    548 			lIndex += pNbComps;
    549 			lNorms[i] += lCurrentValue * lCurrentValue;
    550 		}
    551 		lNorms[i] = sqrt(lNorms[i]);
    552 	}
    553 }
    554