1 /* 2 * The copyright in this software is being made available under the 2-clauses 3 * BSD License, included below. This software may be subject to other third 4 * party and contributor rights, including patent rights, and no such rights 5 * are granted under this license. 6 * 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 8 * Copyright (c) 2002-2014, Professor Benoit Macq 9 * Copyright (c) 2001-2003, David Janssens 10 * Copyright (c) 2002-2003, Yannick Verschueren 11 * Copyright (c) 2003-2007, Francois-Olivier Devaux 12 * Copyright (c) 2003-2014, Antonin Descampe 13 * Copyright (c) 2005, Herve Drolon, FreeImage Team 14 * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR 15 * Copyright (c) 2012, CS Systemes d'Information, France 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #if defined(__SSE__) && !defined(_M_IX86) && !defined(__i386) 41 #define USE_SSE 42 #include <xmmintrin.h> 43 #endif 44 #if defined(__SSE2__) && !defined(_M_IX86) && !defined(__i386) 45 #define USE_SSE2 46 #include <emmintrin.h> 47 #endif 48 #if defined(__SSE4_1__) && !defined(_M_IX86) && !defined(__i386) 49 #define USE_SSE4 50 #include <smmintrin.h> 51 #endif 52 53 #include "opj_includes.h" 54 55 /* <summary> */ 56 /* This table contains the norms of the basis function of the reversible MCT. */ 57 /* </summary> */ 58 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; 59 60 /* <summary> */ 61 /* This table contains the norms of the basis function of the irreversible MCT. */ 62 /* </summary> */ 63 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; 64 65 const OPJ_FLOAT64 * opj_mct_get_mct_norms() 66 { 67 return opj_mct_norms; 68 } 69 70 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real() 71 { 72 return opj_mct_norms_real; 73 } 74 75 /* <summary> */ 76 /* Forward reversible MCT. */ 77 /* </summary> */ 78 #ifdef USE_SSE2 79 void opj_mct_encode( 80 OPJ_INT32* OPJ_RESTRICT c0, 81 OPJ_INT32* OPJ_RESTRICT c1, 82 OPJ_INT32* OPJ_RESTRICT c2, 83 OPJ_SIZE_T n) 84 { 85 OPJ_SIZE_T i; 86 const OPJ_SIZE_T len = n; 87 /* buffer are aligned on 16 bytes */ 88 assert(((size_t)c0 & 0xf) == 0); 89 assert(((size_t)c1 & 0xf) == 0); 90 assert(((size_t)c2 & 0xf) == 0); 91 92 for (i = 0; i < (len & ~3U); i += 4) { 93 __m128i y, u, v; 94 __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); 95 __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); 96 __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); 97 y = _mm_add_epi32(g, g); 98 y = _mm_add_epi32(y, b); 99 y = _mm_add_epi32(y, r); 100 y = _mm_srai_epi32(y, 2); 101 u = _mm_sub_epi32(b, g); 102 v = _mm_sub_epi32(r, g); 103 _mm_store_si128((__m128i *) & (c0[i]), y); 104 _mm_store_si128((__m128i *) & (c1[i]), u); 105 _mm_store_si128((__m128i *) & (c2[i]), v); 106 } 107 108 for (; i < len; ++i) { 109 OPJ_INT32 r = c0[i]; 110 OPJ_INT32 g = c1[i]; 111 OPJ_INT32 b = c2[i]; 112 OPJ_INT32 y = (r + (g * 2) + b) >> 2; 113 OPJ_INT32 u = b - g; 114 OPJ_INT32 v = r - g; 115 c0[i] = y; 116 c1[i] = u; 117 c2[i] = v; 118 } 119 } 120 #else 121 void opj_mct_encode( 122 OPJ_INT32* OPJ_RESTRICT c0, 123 OPJ_INT32* OPJ_RESTRICT c1, 124 OPJ_INT32* OPJ_RESTRICT c2, 125 OPJ_SIZE_T n) 126 { 127 OPJ_SIZE_T i; 128 const OPJ_SIZE_T len = n; 129 130 for (i = 0; i < len; ++i) { 131 OPJ_INT32 r = c0[i]; 132 OPJ_INT32 g = c1[i]; 133 OPJ_INT32 b = c2[i]; 134 OPJ_INT32 y = (r + (g * 2) + b) >> 2; 135 OPJ_INT32 u = b - g; 136 OPJ_INT32 v = r - g; 137 c0[i] = y; 138 c1[i] = u; 139 c2[i] = v; 140 } 141 } 142 #endif 143 144 /* <summary> */ 145 /* Inverse reversible MCT. */ 146 /* </summary> */ 147 #ifdef USE_SSE2 148 void opj_mct_decode( 149 OPJ_INT32* OPJ_RESTRICT c0, 150 OPJ_INT32* OPJ_RESTRICT c1, 151 OPJ_INT32* OPJ_RESTRICT c2, 152 OPJ_SIZE_T n) 153 { 154 OPJ_SIZE_T i; 155 const OPJ_SIZE_T len = n; 156 157 for (i = 0; i < (len & ~3U); i += 4) { 158 __m128i r, g, b; 159 __m128i y = _mm_load_si128((const __m128i *) & (c0[i])); 160 __m128i u = _mm_load_si128((const __m128i *) & (c1[i])); 161 __m128i v = _mm_load_si128((const __m128i *) & (c2[i])); 162 g = y; 163 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2)); 164 r = _mm_add_epi32(v, g); 165 b = _mm_add_epi32(u, g); 166 _mm_store_si128((__m128i *) & (c0[i]), r); 167 _mm_store_si128((__m128i *) & (c1[i]), g); 168 _mm_store_si128((__m128i *) & (c2[i]), b); 169 } 170 for (; i < len; ++i) { 171 OPJ_INT32 y = c0[i]; 172 OPJ_INT32 u = c1[i]; 173 OPJ_INT32 v = c2[i]; 174 OPJ_INT32 g = y - ((u + v) >> 2); 175 OPJ_INT32 r = v + g; 176 OPJ_INT32 b = u + g; 177 c0[i] = r; 178 c1[i] = g; 179 c2[i] = b; 180 } 181 } 182 #else 183 void opj_mct_decode( 184 OPJ_INT32* OPJ_RESTRICT c0, 185 OPJ_INT32* OPJ_RESTRICT c1, 186 OPJ_INT32* OPJ_RESTRICT c2, 187 OPJ_SIZE_T n) 188 { 189 OPJ_UINT32 i; 190 for (i = 0; i < n; ++i) { 191 OPJ_INT32 y = c0[i]; 192 OPJ_INT32 u = c1[i]; 193 OPJ_INT32 v = c2[i]; 194 OPJ_INT32 g = y - ((u + v) >> 2); 195 OPJ_INT32 r = v + g; 196 OPJ_INT32 b = u + g; 197 c0[i] = r; 198 c1[i] = g; 199 c2[i] = b; 200 } 201 } 202 #endif 203 204 /* <summary> */ 205 /* Get norm of basis function of reversible MCT. */ 206 /* </summary> */ 207 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) 208 { 209 return opj_mct_norms[compno]; 210 } 211 212 /* <summary> */ 213 /* Forward irreversible MCT. */ 214 /* </summary> */ 215 #ifdef USE_SSE4 216 void opj_mct_encode_real( 217 OPJ_INT32* OPJ_RESTRICT c0, 218 OPJ_INT32* OPJ_RESTRICT c1, 219 OPJ_INT32* OPJ_RESTRICT c2, 220 OPJ_SIZE_T n) 221 { 222 OPJ_SIZE_T i; 223 const OPJ_SIZE_T len = n; 224 225 const __m128i ry = _mm_set1_epi32(2449); 226 const __m128i gy = _mm_set1_epi32(4809); 227 const __m128i by = _mm_set1_epi32(934); 228 const __m128i ru = _mm_set1_epi32(1382); 229 const __m128i gu = _mm_set1_epi32(2714); 230 /* const __m128i bu = _mm_set1_epi32(4096); */ 231 /* const __m128i rv = _mm_set1_epi32(4096); */ 232 const __m128i gv = _mm_set1_epi32(3430); 233 const __m128i bv = _mm_set1_epi32(666); 234 const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), 235 _MM_SHUFFLE(1, 0, 1, 0)); 236 237 for (i = 0; i < (len & ~3U); i += 4) { 238 __m128i lo, hi; 239 __m128i y, u, v; 240 __m128i r = _mm_load_si128((const __m128i *) & (c0[i])); 241 __m128i g = _mm_load_si128((const __m128i *) & (c1[i])); 242 __m128i b = _mm_load_si128((const __m128i *) & (c2[i])); 243 244 lo = r; 245 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 246 lo = _mm_mul_epi32(lo, ry); 247 hi = _mm_mul_epi32(hi, ry); 248 lo = _mm_add_epi64(lo, mulround); 249 hi = _mm_add_epi64(hi, mulround); 250 lo = _mm_srli_epi64(lo, 13); 251 hi = _mm_slli_epi64(hi, 32 - 13); 252 y = _mm_blend_epi16(lo, hi, 0xCC); 253 254 lo = g; 255 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 256 lo = _mm_mul_epi32(lo, gy); 257 hi = _mm_mul_epi32(hi, gy); 258 lo = _mm_add_epi64(lo, mulround); 259 hi = _mm_add_epi64(hi, mulround); 260 lo = _mm_srli_epi64(lo, 13); 261 hi = _mm_slli_epi64(hi, 32 - 13); 262 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); 263 264 lo = b; 265 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 266 lo = _mm_mul_epi32(lo, by); 267 hi = _mm_mul_epi32(hi, by); 268 lo = _mm_add_epi64(lo, mulround); 269 hi = _mm_add_epi64(hi, mulround); 270 lo = _mm_srli_epi64(lo, 13); 271 hi = _mm_slli_epi64(hi, 32 - 13); 272 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); 273 _mm_store_si128((__m128i *) & (c0[i]), y); 274 275 /*lo = b; 276 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 277 lo = _mm_mul_epi32(lo, mulround); 278 hi = _mm_mul_epi32(hi, mulround);*/ 279 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); 280 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); 281 lo = _mm_slli_epi64(lo, 12); 282 hi = _mm_slli_epi64(hi, 12); 283 lo = _mm_add_epi64(lo, mulround); 284 hi = _mm_add_epi64(hi, mulround); 285 lo = _mm_srli_epi64(lo, 13); 286 hi = _mm_slli_epi64(hi, 32 - 13); 287 u = _mm_blend_epi16(lo, hi, 0xCC); 288 289 lo = r; 290 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 291 lo = _mm_mul_epi32(lo, ru); 292 hi = _mm_mul_epi32(hi, ru); 293 lo = _mm_add_epi64(lo, mulround); 294 hi = _mm_add_epi64(hi, mulround); 295 lo = _mm_srli_epi64(lo, 13); 296 hi = _mm_slli_epi64(hi, 32 - 13); 297 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); 298 299 lo = g; 300 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 301 lo = _mm_mul_epi32(lo, gu); 302 hi = _mm_mul_epi32(hi, gu); 303 lo = _mm_add_epi64(lo, mulround); 304 hi = _mm_add_epi64(hi, mulround); 305 lo = _mm_srli_epi64(lo, 13); 306 hi = _mm_slli_epi64(hi, 32 - 13); 307 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); 308 _mm_store_si128((__m128i *) & (c1[i]), u); 309 310 /*lo = r; 311 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 312 lo = _mm_mul_epi32(lo, mulround); 313 hi = _mm_mul_epi32(hi, mulround);*/ 314 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); 315 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); 316 lo = _mm_slli_epi64(lo, 12); 317 hi = _mm_slli_epi64(hi, 12); 318 lo = _mm_add_epi64(lo, mulround); 319 hi = _mm_add_epi64(hi, mulround); 320 lo = _mm_srli_epi64(lo, 13); 321 hi = _mm_slli_epi64(hi, 32 - 13); 322 v = _mm_blend_epi16(lo, hi, 0xCC); 323 324 lo = g; 325 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 326 lo = _mm_mul_epi32(lo, gv); 327 hi = _mm_mul_epi32(hi, gv); 328 lo = _mm_add_epi64(lo, mulround); 329 hi = _mm_add_epi64(hi, mulround); 330 lo = _mm_srli_epi64(lo, 13); 331 hi = _mm_slli_epi64(hi, 32 - 13); 332 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); 333 334 lo = b; 335 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 336 lo = _mm_mul_epi32(lo, bv); 337 hi = _mm_mul_epi32(hi, bv); 338 lo = _mm_add_epi64(lo, mulround); 339 hi = _mm_add_epi64(hi, mulround); 340 lo = _mm_srli_epi64(lo, 13); 341 hi = _mm_slli_epi64(hi, 32 - 13); 342 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); 343 _mm_store_si128((__m128i *) & (c2[i]), v); 344 } 345 for (; i < len; ++i) { 346 OPJ_INT32 r = c0[i]; 347 OPJ_INT32 g = c1[i]; 348 OPJ_INT32 b = c2[i]; 349 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 350 4809) + opj_int_fix_mul(b, 934); 351 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 352 2714) + opj_int_fix_mul(b, 4096); 353 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 354 3430) - opj_int_fix_mul(b, 666); 355 c0[i] = y; 356 c1[i] = u; 357 c2[i] = v; 358 } 359 } 360 #else 361 void opj_mct_encode_real( 362 OPJ_INT32* OPJ_RESTRICT c0, 363 OPJ_INT32* OPJ_RESTRICT c1, 364 OPJ_INT32* OPJ_RESTRICT c2, 365 OPJ_SIZE_T n) 366 { 367 OPJ_UINT32 i; 368 for (i = 0; i < n; ++i) { 369 OPJ_INT32 r = c0[i]; 370 OPJ_INT32 g = c1[i]; 371 OPJ_INT32 b = c2[i]; 372 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 373 4809) + opj_int_fix_mul(b, 934); 374 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 375 2714) + opj_int_fix_mul(b, 4096); 376 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 377 3430) - opj_int_fix_mul(b, 666); 378 c0[i] = y; 379 c1[i] = u; 380 c2[i] = v; 381 } 382 } 383 #endif 384 385 /* <summary> */ 386 /* Inverse irreversible MCT. */ 387 /* </summary> */ 388 void opj_mct_decode_real( 389 OPJ_FLOAT32* OPJ_RESTRICT c0, 390 OPJ_FLOAT32* OPJ_RESTRICT c1, 391 OPJ_FLOAT32* OPJ_RESTRICT c2, 392 OPJ_SIZE_T n) 393 { 394 OPJ_UINT32 i; 395 #ifdef USE_SSE 396 __m128 vrv, vgu, vgv, vbu; 397 vrv = _mm_set1_ps(1.402f); 398 vgu = _mm_set1_ps(0.34413f); 399 vgv = _mm_set1_ps(0.71414f); 400 vbu = _mm_set1_ps(1.772f); 401 for (i = 0; i < (n >> 3); ++i) { 402 __m128 vy, vu, vv; 403 __m128 vr, vg, vb; 404 405 vy = _mm_load_ps(c0); 406 vu = _mm_load_ps(c1); 407 vv = _mm_load_ps(c2); 408 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 409 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); 410 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 411 _mm_store_ps(c0, vr); 412 _mm_store_ps(c1, vg); 413 _mm_store_ps(c2, vb); 414 c0 += 4; 415 c1 += 4; 416 c2 += 4; 417 418 vy = _mm_load_ps(c0); 419 vu = _mm_load_ps(c1); 420 vv = _mm_load_ps(c2); 421 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 422 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); 423 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 424 _mm_store_ps(c0, vr); 425 _mm_store_ps(c1, vg); 426 _mm_store_ps(c2, vb); 427 c0 += 4; 428 c1 += 4; 429 c2 += 4; 430 } 431 n &= 7; 432 #endif 433 for (i = 0; i < n; ++i) { 434 OPJ_FLOAT32 y = c0[i]; 435 OPJ_FLOAT32 u = c1[i]; 436 OPJ_FLOAT32 v = c2[i]; 437 OPJ_FLOAT32 r = y + (v * 1.402f); 438 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); 439 OPJ_FLOAT32 b = y + (u * 1.772f); 440 c0[i] = r; 441 c1[i] = g; 442 c2[i] = b; 443 } 444 } 445 446 /* <summary> */ 447 /* Get norm of basis function of irreversible MCT. */ 448 /* </summary> */ 449 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) 450 { 451 return opj_mct_norms_real[compno]; 452 } 453 454 455 OPJ_BOOL opj_mct_encode_custom( 456 OPJ_BYTE * pCodingdata, 457 OPJ_SIZE_T n, 458 OPJ_BYTE ** pData, 459 OPJ_UINT32 pNbComp, 460 OPJ_UINT32 isSigned) 461 { 462 OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata; 463 OPJ_SIZE_T i; 464 OPJ_UINT32 j; 465 OPJ_UINT32 k; 466 OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp; 467 OPJ_INT32 * lCurrentData = 00; 468 OPJ_INT32 * lCurrentMatrix = 00; 469 OPJ_INT32 ** lData = (OPJ_INT32 **) pData; 470 OPJ_UINT32 lMultiplicator = 1 << 13; 471 OPJ_INT32 * lMctPtr; 472 473 OPJ_ARG_NOT_USED(isSigned); 474 475 lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof( 476 OPJ_INT32)); 477 if (! lCurrentData) { 478 return OPJ_FALSE; 479 } 480 481 lCurrentMatrix = lCurrentData + pNbComp; 482 483 for (i = 0; i < lNbMatCoeff; ++i) { 484 lCurrentMatrix[i] = (OPJ_INT32)(*(lMct++) * (OPJ_FLOAT32)lMultiplicator); 485 } 486 487 for (i = 0; i < n; ++i) { 488 lMctPtr = lCurrentMatrix; 489 for (j = 0; j < pNbComp; ++j) { 490 lCurrentData[j] = (*(lData[j])); 491 } 492 493 for (j = 0; j < pNbComp; ++j) { 494 *(lData[j]) = 0; 495 for (k = 0; k < pNbComp; ++k) { 496 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]); 497 ++lMctPtr; 498 } 499 500 ++lData[j]; 501 } 502 } 503 504 opj_free(lCurrentData); 505 506 return OPJ_TRUE; 507 } 508 509 OPJ_BOOL opj_mct_decode_custom( 510 OPJ_BYTE * pDecodingData, 511 OPJ_SIZE_T n, 512 OPJ_BYTE ** pData, 513 OPJ_UINT32 pNbComp, 514 OPJ_UINT32 isSigned) 515 { 516 OPJ_FLOAT32 * lMct; 517 OPJ_SIZE_T i; 518 OPJ_UINT32 j; 519 OPJ_UINT32 k; 520 521 OPJ_FLOAT32 * lCurrentData = 00; 522 OPJ_FLOAT32 * lCurrentResult = 00; 523 OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData; 524 525 OPJ_ARG_NOT_USED(isSigned); 526 527 lCurrentData = (OPJ_FLOAT32 *) opj_malloc(2 * pNbComp * sizeof(OPJ_FLOAT32)); 528 if (! lCurrentData) { 529 return OPJ_FALSE; 530 } 531 lCurrentResult = lCurrentData + pNbComp; 532 533 for (i = 0; i < n; ++i) { 534 lMct = (OPJ_FLOAT32 *) pDecodingData; 535 for (j = 0; j < pNbComp; ++j) { 536 lCurrentData[j] = (OPJ_FLOAT32)(*(lData[j])); 537 } 538 for (j = 0; j < pNbComp; ++j) { 539 lCurrentResult[j] = 0; 540 for (k = 0; k < pNbComp; ++k) { 541 lCurrentResult[j] += *(lMct++) * lCurrentData[k]; 542 } 543 *(lData[j]++) = (OPJ_FLOAT32)(lCurrentResult[j]); 544 } 545 } 546 opj_free(lCurrentData); 547 return OPJ_TRUE; 548 } 549 550 void opj_calculate_norms(OPJ_FLOAT64 * pNorms, 551 OPJ_UINT32 pNbComps, 552 OPJ_FLOAT32 * pMatrix) 553 { 554 OPJ_UINT32 i, j, lIndex; 555 OPJ_FLOAT32 lCurrentValue; 556 OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms; 557 OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix; 558 559 for (i = 0; i < pNbComps; ++i) { 560 lNorms[i] = 0; 561 lIndex = i; 562 563 for (j = 0; j < pNbComps; ++j) { 564 lCurrentValue = lMatrix[lIndex]; 565 lIndex += pNbComps; 566 lNorms[i] += lCurrentValue * lCurrentValue; 567 } 568 lNorms[i] = sqrt(lNorms[i]); 569 } 570 } 571