Home | History | Annotate | Download | only in libopenjpeg20
      1 /*
      2  * The copyright in this software is being made available under the 2-clauses
      3  * BSD License, included below. This software may be subject to other third
      4  * party and contributor rights, including patent rights, and no such rights
      5  * are granted under this license.
      6  *
      7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
      8  * Copyright (c) 2002-2014, Professor Benoit Macq
      9  * Copyright (c) 2001-2003, David Janssens
     10  * Copyright (c) 2002-2003, Yannick Verschueren
     11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
     12  * Copyright (c) 2003-2014, Antonin Descampe
     13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
     14  * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
     15  * Copyright (c) 2012, CS Systemes d'Information, France
     16  * All rights reserved.
     17  *
     18  * Redistribution and use in source and binary forms, with or without
     19  * modification, are permitted provided that the following conditions
     20  * are met:
     21  * 1. Redistributions of source code must retain the above copyright
     22  *    notice, this list of conditions and the following disclaimer.
     23  * 2. Redistributions in binary form must reproduce the above copyright
     24  *    notice, this list of conditions and the following disclaimer in the
     25  *    documentation and/or other materials provided with the distribution.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
     28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386)
     41 #define USE_SSE
     42 #include <xmmintrin.h>
     43 #endif
     44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386)
     45 #define USE_SSE2
     46 #include <emmintrin.h>
     47 #endif
     48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386)
     49 #define USE_SSE4
     50 #include <smmintrin.h>
     51 #endif
     52 
     53 #include "opj_includes.h"
     54 
     55 /* <summary> */
     56 /* This table contains the norms of the basis function of the reversible MCT. */
     57 /* </summary> */
     58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
     59 
     60 /* <summary> */
     61 /* This table contains the norms of the basis function of the irreversible MCT. */
     62 /* </summary> */
     63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
     64 
     65 const OPJ_FLOAT64 * opj_mct_get_mct_norms()
     66 {
     67     return opj_mct_norms;
     68 }
     69 
     70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real()
     71 {
     72     return opj_mct_norms_real;
     73 }
     74 
     75 /* <summary> */
     76 /* Forward reversible MCT. */
     77 /* </summary> */
     78 #ifdef USE_SSE2
     79 void opj_mct_encode(
     80     OPJ_INT32* OPJ_RESTRICT c0,
     81     OPJ_INT32* OPJ_RESTRICT c1,
     82     OPJ_INT32* OPJ_RESTRICT c2,
     83     OPJ_SIZE_T n)
     84 {
     85     OPJ_SIZE_T i;
     86     const OPJ_SIZE_T len = n;
     87     /* buffer are aligned on 16 bytes */
     88     assert(((size_t)c0 & 0xf) == 0);
     89     assert(((size_t)c1 & 0xf) == 0);
     90     assert(((size_t)c2 & 0xf) == 0);
     91 
     92     for (i = 0; i < (len & ~3U); i += 4) {
     93         __m128i y, u, v;
     94         __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
     95         __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
     96         __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
     97         y = _mm_add_epi32(g, g);
     98         y = _mm_add_epi32(y, b);
     99         y = _mm_add_epi32(y, r);
    100         y = _mm_srai_epi32(y, 2);
    101         u = _mm_sub_epi32(b, g);
    102         v = _mm_sub_epi32(r, g);
    103         _mm_store_si128((__m128i *) & (c0[i]), y);
    104         _mm_store_si128((__m128i *) & (c1[i]), u);
    105         _mm_store_si128((__m128i *) & (c2[i]), v);
    106     }
    107 
    108     for (; i < len; ++i) {
    109         OPJ_INT32 r = c0[i];
    110         OPJ_INT32 g = c1[i];
    111         OPJ_INT32 b = c2[i];
    112         OPJ_INT32 y = (r + (g * 2) + b) >> 2;
    113         OPJ_INT32 u = b - g;
    114         OPJ_INT32 v = r - g;
    115         c0[i] = y;
    116         c1[i] = u;
    117         c2[i] = v;
    118     }
    119 }
    120 #else
    121 void opj_mct_encode(
    122     OPJ_INT32* OPJ_RESTRICT c0,
    123     OPJ_INT32* OPJ_RESTRICT c1,
    124     OPJ_INT32* OPJ_RESTRICT c2,
    125     OPJ_SIZE_T n)
    126 {
    127     OPJ_SIZE_T i;
    128     const OPJ_SIZE_T len = n;
    129 
    130     for (i = 0; i < len; ++i) {
    131         OPJ_INT32 r = c0[i];
    132         OPJ_INT32 g = c1[i];
    133         OPJ_INT32 b = c2[i];
    134         OPJ_INT32 y = (r + (g * 2) + b) >> 2;
    135         OPJ_INT32 u = b - g;
    136         OPJ_INT32 v = r - g;
    137         c0[i] = y;
    138         c1[i] = u;
    139         c2[i] = v;
    140     }
    141 }
    142 #endif
    143 
    144 /* <summary> */
    145 /* Inverse reversible MCT. */
    146 /* </summary> */
    147 #ifdef USE_SSE2
    148 void opj_mct_decode(
    149     OPJ_INT32* OPJ_RESTRICT c0,
    150     OPJ_INT32* OPJ_RESTRICT c1,
    151     OPJ_INT32* OPJ_RESTRICT c2,
    152     OPJ_SIZE_T n)
    153 {
    154     OPJ_SIZE_T i;
    155     const OPJ_SIZE_T len = n;
    156 
    157     for (i = 0; i < (len & ~3U); i += 4) {
    158         __m128i r, g, b;
    159         __m128i y = _mm_load_si128((const __m128i *) & (c0[i]));
    160         __m128i u = _mm_load_si128((const __m128i *) & (c1[i]));
    161         __m128i v = _mm_load_si128((const __m128i *) & (c2[i]));
    162         g = y;
    163         g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
    164         r = _mm_add_epi32(v, g);
    165         b = _mm_add_epi32(u, g);
    166         _mm_store_si128((__m128i *) & (c0[i]), r);
    167         _mm_store_si128((__m128i *) & (c1[i]), g);
    168         _mm_store_si128((__m128i *) & (c2[i]), b);
    169     }
    170     for (; i < len; ++i) {
    171         OPJ_INT32 y = c0[i];
    172         OPJ_INT32 u = c1[i];
    173         OPJ_INT32 v = c2[i];
    174         OPJ_INT32 g = y - ((u + v) >> 2);
    175         OPJ_INT32 r = v + g;
    176         OPJ_INT32 b = u + g;
    177         c0[i] = r;
    178         c1[i] = g;
    179         c2[i] = b;
    180     }
    181 }
    182 #else
    183 void opj_mct_decode(
    184     OPJ_INT32* OPJ_RESTRICT c0,
    185     OPJ_INT32* OPJ_RESTRICT c1,
    186     OPJ_INT32* OPJ_RESTRICT c2,
    187     OPJ_SIZE_T n)
    188 {
    189     OPJ_UINT32 i;
    190     for (i = 0; i < n; ++i) {
    191         OPJ_INT32 y = c0[i];
    192         OPJ_INT32 u = c1[i];
    193         OPJ_INT32 v = c2[i];
    194         OPJ_INT32 g = y - ((u + v) >> 2);
    195         OPJ_INT32 r = v + g;
    196         OPJ_INT32 b = u + g;
    197         c0[i] = r;
    198         c1[i] = g;
    199         c2[i] = b;
    200     }
    201 }
    202 #endif
    203 
    204 /* <summary> */
    205 /* Get norm of basis function of reversible MCT. */
    206 /* </summary> */
    207 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno)
    208 {
    209     return opj_mct_norms[compno];
    210 }
    211 
    212 /* <summary> */
    213 /* Forward irreversible MCT. */
    214 /* </summary> */
    215 #ifdef USE_SSE4
    216 void opj_mct_encode_real(
    217     OPJ_INT32* OPJ_RESTRICT c0,
    218     OPJ_INT32* OPJ_RESTRICT c1,
    219     OPJ_INT32* OPJ_RESTRICT c2,
    220     OPJ_SIZE_T n)
    221 {
    222     OPJ_SIZE_T i;
    223     const OPJ_SIZE_T len = n;
    224 
    225     const __m128i ry = _mm_set1_epi32(2449);
    226     const __m128i gy = _mm_set1_epi32(4809);
    227     const __m128i by = _mm_set1_epi32(934);
    228     const __m128i ru = _mm_set1_epi32(1382);
    229     const __m128i gu = _mm_set1_epi32(2714);
    230     /* const __m128i bu = _mm_set1_epi32(4096); */
    231     /* const __m128i rv = _mm_set1_epi32(4096); */
    232     const __m128i gv = _mm_set1_epi32(3430);
    233     const __m128i bv = _mm_set1_epi32(666);
    234     const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096),
    235                              _MM_SHUFFLE(1, 0, 1, 0));
    236 
    237     for (i = 0; i < (len & ~3U); i += 4) {
    238         __m128i lo, hi;
    239         __m128i y, u, v;
    240         __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
    241         __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
    242         __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
    243 
    244         lo = r;
    245         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    246         lo = _mm_mul_epi32(lo, ry);
    247         hi = _mm_mul_epi32(hi, ry);
    248         lo = _mm_add_epi64(lo, mulround);
    249         hi = _mm_add_epi64(hi, mulround);
    250         lo = _mm_srli_epi64(lo, 13);
    251         hi = _mm_slli_epi64(hi, 32 - 13);
    252         y = _mm_blend_epi16(lo, hi, 0xCC);
    253 
    254         lo = g;
    255         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    256         lo = _mm_mul_epi32(lo, gy);
    257         hi = _mm_mul_epi32(hi, gy);
    258         lo = _mm_add_epi64(lo, mulround);
    259         hi = _mm_add_epi64(hi, mulround);
    260         lo = _mm_srli_epi64(lo, 13);
    261         hi = _mm_slli_epi64(hi, 32 - 13);
    262         y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
    263 
    264         lo = b;
    265         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    266         lo = _mm_mul_epi32(lo, by);
    267         hi = _mm_mul_epi32(hi, by);
    268         lo = _mm_add_epi64(lo, mulround);
    269         hi = _mm_add_epi64(hi, mulround);
    270         lo = _mm_srli_epi64(lo, 13);
    271         hi = _mm_slli_epi64(hi, 32 - 13);
    272         y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
    273         _mm_store_si128((__m128i *) & (c0[i]), y);
    274 
    275         /*lo = b;
    276         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    277         lo = _mm_mul_epi32(lo, mulround);
    278         hi = _mm_mul_epi32(hi, mulround);*/
    279         lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
    280         hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
    281         lo = _mm_slli_epi64(lo, 12);
    282         hi = _mm_slli_epi64(hi, 12);
    283         lo = _mm_add_epi64(lo, mulround);
    284         hi = _mm_add_epi64(hi, mulround);
    285         lo = _mm_srli_epi64(lo, 13);
    286         hi = _mm_slli_epi64(hi, 32 - 13);
    287         u = _mm_blend_epi16(lo, hi, 0xCC);
    288 
    289         lo = r;
    290         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    291         lo = _mm_mul_epi32(lo, ru);
    292         hi = _mm_mul_epi32(hi, ru);
    293         lo = _mm_add_epi64(lo, mulround);
    294         hi = _mm_add_epi64(hi, mulround);
    295         lo = _mm_srli_epi64(lo, 13);
    296         hi = _mm_slli_epi64(hi, 32 - 13);
    297         u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
    298 
    299         lo = g;
    300         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    301         lo = _mm_mul_epi32(lo, gu);
    302         hi = _mm_mul_epi32(hi, gu);
    303         lo = _mm_add_epi64(lo, mulround);
    304         hi = _mm_add_epi64(hi, mulround);
    305         lo = _mm_srli_epi64(lo, 13);
    306         hi = _mm_slli_epi64(hi, 32 - 13);
    307         u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
    308         _mm_store_si128((__m128i *) & (c1[i]), u);
    309 
    310         /*lo = r;
    311         hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
    312         lo = _mm_mul_epi32(lo, mulround);
    313         hi = _mm_mul_epi32(hi, mulround);*/
    314         lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
    315         hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
    316         lo = _mm_slli_epi64(lo, 12);
    317         hi = _mm_slli_epi64(hi, 12);
    318         lo = _mm_add_epi64(lo, mulround);
    319         hi = _mm_add_epi64(hi, mulround);
    320         lo = _mm_srli_epi64(lo, 13);
    321         hi = _mm_slli_epi64(hi, 32 - 13);
    322         v = _mm_blend_epi16(lo, hi, 0xCC);
    323 
    324         lo = g;
    325         hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
    326         lo = _mm_mul_epi32(lo, gv);
    327         hi = _mm_mul_epi32(hi, gv);
    328         lo = _mm_add_epi64(lo, mulround);
    329         hi = _mm_add_epi64(hi, mulround);
    330         lo = _mm_srli_epi64(lo, 13);
    331         hi = _mm_slli_epi64(hi, 32 - 13);
    332         v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
    333 
    334         lo = b;
    335         hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
    336         lo = _mm_mul_epi32(lo, bv);
    337         hi = _mm_mul_epi32(hi, bv);
    338         lo = _mm_add_epi64(lo, mulround);
    339         hi = _mm_add_epi64(hi, mulround);
    340         lo = _mm_srli_epi64(lo, 13);
    341         hi = _mm_slli_epi64(hi, 32 - 13);
    342         v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
    343         _mm_store_si128((__m128i *) & (c2[i]), v);
    344     }
    345     for (; i < len; ++i) {
    346         OPJ_INT32 r = c0[i];
    347         OPJ_INT32 g = c1[i];
    348         OPJ_INT32 b = c2[i];
    349         OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
    350                        4809) + opj_int_fix_mul(b, 934);
    351         OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
    352                       2714) + opj_int_fix_mul(b, 4096);
    353         OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
    354                        3430) - opj_int_fix_mul(b, 666);
    355         c0[i] = y;
    356         c1[i] = u;
    357         c2[i] = v;
    358     }
    359 }
    360 #else
    361 void opj_mct_encode_real(
    362     OPJ_INT32* OPJ_RESTRICT c0,
    363     OPJ_INT32* OPJ_RESTRICT c1,
    364     OPJ_INT32* OPJ_RESTRICT c2,
    365     OPJ_SIZE_T n)
    366 {
    367     OPJ_UINT32 i;
    368     for (i = 0; i < n; ++i) {
    369         OPJ_INT32 r = c0[i];
    370         OPJ_INT32 g = c1[i];
    371         OPJ_INT32 b = c2[i];
    372         OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
    373                        4809) + opj_int_fix_mul(b, 934);
    374         OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
    375                       2714) + opj_int_fix_mul(b, 4096);
    376         OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
    377                        3430) - opj_int_fix_mul(b, 666);
    378         c0[i] = y;
    379         c1[i] = u;
    380         c2[i] = v;
    381     }
    382 }
    383 #endif
    384 
    385 /* <summary> */
    386 /* Inverse irreversible MCT. */
    387 /* </summary> */
    388 void opj_mct_decode_real(
    389     OPJ_FLOAT32* OPJ_RESTRICT c0,
    390     OPJ_FLOAT32* OPJ_RESTRICT c1,
    391     OPJ_FLOAT32* OPJ_RESTRICT c2,
    392     OPJ_SIZE_T n)
    393 {
    394     OPJ_UINT32 i;
    395 #ifdef USE_SSE
    396     __m128 vrv, vgu, vgv, vbu;
    397     vrv = _mm_set1_ps(1.402f);
    398     vgu = _mm_set1_ps(0.34413f);
    399     vgv = _mm_set1_ps(0.71414f);
    400     vbu = _mm_set1_ps(1.772f);
    401     for (i = 0; i < (n >> 3); ++i) {
    402         __m128 vy, vu, vv;
    403         __m128 vr, vg, vb;
    404 
    405         vy = _mm_load_ps(c0);
    406         vu = _mm_load_ps(c1);
    407         vv = _mm_load_ps(c2);
    408         vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
    409         vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
    410         vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
    411         _mm_store_ps(c0, vr);
    412         _mm_store_ps(c1, vg);
    413         _mm_store_ps(c2, vb);
    414         c0 += 4;
    415         c1 += 4;
    416         c2 += 4;
    417 
    418         vy = _mm_load_ps(c0);
    419         vu = _mm_load_ps(c1);
    420         vv = _mm_load_ps(c2);
    421         vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
    422         vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
    423         vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
    424         _mm_store_ps(c0, vr);
    425         _mm_store_ps(c1, vg);
    426         _mm_store_ps(c2, vb);
    427         c0 += 4;
    428         c1 += 4;
    429         c2 += 4;
    430     }
    431     n &= 7;
    432 #endif
    433     for (i = 0; i < n; ++i) {
    434         OPJ_FLOAT32 y = c0[i];
    435         OPJ_FLOAT32 u = c1[i];
    436         OPJ_FLOAT32 v = c2[i];
    437         OPJ_FLOAT32 r = y + (v * 1.402f);
    438         OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
    439         OPJ_FLOAT32 b = y + (u * 1.772f);
    440         c0[i] = r;
    441         c1[i] = g;
    442         c2[i] = b;
    443     }
    444 }
    445 
    446 /* <summary> */
    447 /* Get norm of basis function of irreversible MCT. */
    448 /* </summary> */
    449 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno)
    450 {
    451     return opj_mct_norms_real[compno];
    452 }
    453 
    454 
    455 OPJ_BOOL opj_mct_encode_custom(
    456     OPJ_BYTE * pCodingdata,
    457     OPJ_SIZE_T n,
    458     OPJ_BYTE ** pData,
    459     OPJ_UINT32 pNbComp,
    460     OPJ_UINT32 isSigned)
    461 {
    462     OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
    463     OPJ_SIZE_T i;
    464     OPJ_UINT32 j;
    465     OPJ_UINT32 k;
    466     OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
    467     OPJ_INT32 * lCurrentData = 00;
    468     OPJ_INT32 * lCurrentMatrix = 00;
    469     OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
    470     OPJ_UINT32 lMultiplicator = 1 << 13;
    471     OPJ_INT32 * lMctPtr;
    472 
    473     OPJ_ARG_NOT_USED(isSigned);
    474 
    475     lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(
    476             OPJ_INT32));
    477     if (! lCurrentData) {
    478         return OPJ_FALSE;
    479     }
    480 
    481     lCurrentMatrix = lCurrentData + pNbComp;
    482 
    483     for (i = 0; i < lNbMatCoeff; ++i) {
    484         lCurrentMatrix[i] = (OPJ_INT32)(*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
    485     }
    486 
    487     for (i = 0; i < n; ++i)  {
    488         lMctPtr = lCurrentMatrix;
    489         for (j = 0; j < pNbComp; ++j) {
    490             lCurrentData[j] = (*(lData[j]));
    491         }
    492 
    493         for (j = 0; j < pNbComp; ++j) {
    494             *(lData[j]) = 0;
    495             for (k = 0; k < pNbComp; ++k) {
    496                 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
    497                 ++lMctPtr;
    498             }
    499 
    500             ++lData[j];
    501         }
    502     }
    503 
    504     opj_free(lCurrentData);
    505 
    506     return OPJ_TRUE;
    507 }
    508 
    509 OPJ_BOOL opj_mct_decode_custom(
    510     OPJ_BYTE * pDecodingData,
    511     OPJ_SIZE_T n,
    512     OPJ_BYTE ** pData,
    513     OPJ_UINT32 pNbComp,
    514     OPJ_UINT32 isSigned)
    515 {
    516     OPJ_FLOAT32 * lMct;
    517     OPJ_SIZE_T i;
    518     OPJ_UINT32 j;
    519     OPJ_UINT32 k;
    520 
    521     OPJ_FLOAT32 * lCurrentData = 00;
    522     OPJ_FLOAT32 * lCurrentResult = 00;
    523     OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
    524 
    525     OPJ_ARG_NOT_USED(isSigned);
    526 
    527     lCurrentData = (OPJ_FLOAT32 *) opj_malloc(2 * pNbComp * sizeof(OPJ_FLOAT32));
    528     if (! lCurrentData) {
    529         return OPJ_FALSE;
    530     }
    531     lCurrentResult = lCurrentData + pNbComp;
    532 
    533     for (i = 0; i < n; ++i) {
    534         lMct = (OPJ_FLOAT32 *) pDecodingData;
    535         for (j = 0; j < pNbComp; ++j) {
    536             lCurrentData[j] = (OPJ_FLOAT32)(*(lData[j]));
    537         }
    538         for (j = 0; j < pNbComp; ++j) {
    539             lCurrentResult[j] = 0;
    540             for (k = 0; k < pNbComp; ++k) {
    541                 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
    542             }
    543             *(lData[j]++) = (OPJ_FLOAT32)(lCurrentResult[j]);
    544         }
    545     }
    546     opj_free(lCurrentData);
    547     return OPJ_TRUE;
    548 }
    549 
    550 void opj_calculate_norms(OPJ_FLOAT64 * pNorms,
    551                          OPJ_UINT32 pNbComps,
    552                          OPJ_FLOAT32 * pMatrix)
    553 {
    554     OPJ_UINT32 i, j, lIndex;
    555     OPJ_FLOAT32 lCurrentValue;
    556     OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
    557     OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
    558 
    559     for (i = 0; i < pNbComps; ++i) {
    560         lNorms[i] = 0;
    561         lIndex = i;
    562 
    563         for (j = 0; j < pNbComps; ++j) {
    564             lCurrentValue = lMatrix[lIndex];
    565             lIndex += pNbComps;
    566             lNorms[i] += lCurrentValue * lCurrentValue;
    567         }
    568         lNorms[i] = sqrt(lNorms[i]);
    569     }
    570 }
    571