1 /* 2 * The copyright in this software is being made available under the 2-clauses 3 * BSD License, included below. This software may be subject to other third 4 * party and contributor rights, including patent rights, and no such rights 5 * are granted under this license. 6 * 7 * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium 8 * Copyright (c) 2002-2014, Professor Benoit Macq 9 * Copyright (c) 2001-2003, David Janssens 10 * Copyright (c) 2002-2003, Yannick Verschueren 11 * Copyright (c) 2003-2007, Francois-Olivier Devaux 12 * Copyright (c) 2003-2014, Antonin Descampe 13 * Copyright (c) 2005, Herve Drolon, FreeImage Team 14 * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR 15 * Copyright (c) 2012, CS Systemes d'Information, France 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions 20 * are met: 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' 28 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #ifdef __SSE__ 41 #include <xmmintrin.h> 42 #endif 43 #ifdef __SSE2__ 44 #include <emmintrin.h> 45 #endif 46 #ifdef __SSE4_1__ 47 #include <smmintrin.h> 48 #endif 49 50 #include "opj_includes.h" 51 52 /* <summary> */ 53 /* This table contains the norms of the basis function of the reversible MCT. */ 54 /* </summary> */ 55 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 }; 56 57 /* <summary> */ 58 /* This table contains the norms of the basis function of the irreversible MCT. */ 59 /* </summary> */ 60 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 }; 61 62 const OPJ_FLOAT64 * opj_mct_get_mct_norms () 63 { 64 return opj_mct_norms; 65 } 66 67 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real () 68 { 69 return opj_mct_norms_real; 70 } 71 72 /* <summary> */ 73 /* Foward reversible MCT. */ 74 /* </summary> */ 75 #ifdef __SSE2__ 76 void opj_mct_encode( 77 OPJ_INT32* restrict c0, 78 OPJ_INT32* restrict c1, 79 OPJ_INT32* restrict c2, 80 OPJ_UINT32 n) 81 { 82 OPJ_SIZE_T i; 83 const OPJ_SIZE_T len = n; 84 85 for(i = 0; i < (len & ~3U); i += 4) { 86 __m128i y, u, v; 87 __m128i r = _mm_load_si128((const __m128i *)&(c0[i])); 88 __m128i g = _mm_load_si128((const __m128i *)&(c1[i])); 89 __m128i b = _mm_load_si128((const __m128i *)&(c2[i])); 90 y = _mm_add_epi32(g, g); 91 y = _mm_add_epi32(y, b); 92 y = _mm_add_epi32(y, r); 93 y = _mm_srai_epi32(y, 2); 94 u = _mm_sub_epi32(b, g); 95 v = _mm_sub_epi32(r, g); 96 _mm_store_si128((__m128i *)&(c0[i]), y); 97 _mm_store_si128((__m128i *)&(c1[i]), u); 98 _mm_store_si128((__m128i *)&(c2[i]), v); 99 } 100 101 for(; i < len; ++i) { 102 OPJ_INT32 r = c0[i]; 103 OPJ_INT32 g = c1[i]; 104 OPJ_INT32 b = c2[i]; 105 OPJ_INT32 y = (r + (g * 2) + b) >> 2; 106 OPJ_INT32 u = b - g; 107 OPJ_INT32 v = r - g; 108 c0[i] = y; 109 c1[i] = u; 110 c2[i] = v; 111 } 112 } 113 #else 114 void opj_mct_encode( 115 OPJ_INT32* restrict c0, 116 OPJ_INT32* restrict c1, 117 OPJ_INT32* restrict c2, 118 OPJ_UINT32 n) 119 { 120 OPJ_SIZE_T i; 121 const OPJ_SIZE_T len = n; 122 123 for(i = 0; i < len; ++i) { 124 OPJ_INT32 r = c0[i]; 125 OPJ_INT32 g = c1[i]; 126 OPJ_INT32 b = c2[i]; 127 OPJ_INT32 y = (r + (g * 2) + b) >> 2; 128 OPJ_INT32 u = b - g; 129 OPJ_INT32 v = r - g; 130 c0[i] = y; 131 c1[i] = u; 132 c2[i] = v; 133 } 134 } 135 #endif 136 137 /* <summary> */ 138 /* Inverse reversible MCT. */ 139 /* </summary> */ 140 #ifdef __SSE2__ 141 void opj_mct_decode( 142 OPJ_INT32* restrict c0, 143 OPJ_INT32* restrict c1, 144 OPJ_INT32* restrict c2, 145 OPJ_UINT32 n) 146 { 147 OPJ_SIZE_T i; 148 const OPJ_SIZE_T len = n; 149 150 for(i = 0; i < (len & ~3U); i += 4) { 151 __m128i r, g, b; 152 __m128i y = _mm_load_si128((const __m128i *)&(c0[i])); 153 __m128i u = _mm_load_si128((const __m128i *)&(c1[i])); 154 __m128i v = _mm_load_si128((const __m128i *)&(c2[i])); 155 g = y; 156 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2)); 157 r = _mm_add_epi32(v, g); 158 b = _mm_add_epi32(u, g); 159 _mm_store_si128((__m128i *)&(c0[i]), r); 160 _mm_store_si128((__m128i *)&(c1[i]), g); 161 _mm_store_si128((__m128i *)&(c2[i]), b); 162 } 163 for (; i < len; ++i) { 164 OPJ_INT32 y = c0[i]; 165 OPJ_INT32 u = c1[i]; 166 OPJ_INT32 v = c2[i]; 167 OPJ_INT32 g = y - ((u + v) >> 2); 168 OPJ_INT32 r = v + g; 169 OPJ_INT32 b = u + g; 170 c0[i] = r; 171 c1[i] = g; 172 c2[i] = b; 173 } 174 } 175 #else 176 void opj_mct_decode( 177 OPJ_INT32* restrict c0, 178 OPJ_INT32* restrict c1, 179 OPJ_INT32* restrict c2, 180 OPJ_UINT32 n) 181 { 182 OPJ_UINT32 i; 183 for (i = 0; i < n; ++i) { 184 OPJ_INT32 y = c0[i]; 185 OPJ_INT32 u = c1[i]; 186 OPJ_INT32 v = c2[i]; 187 OPJ_INT32 g = y - ((u + v) >> 2); 188 OPJ_INT32 r = v + g; 189 OPJ_INT32 b = u + g; 190 c0[i] = r; 191 c1[i] = g; 192 c2[i] = b; 193 } 194 } 195 #endif 196 197 /* <summary> */ 198 /* Get norm of basis function of reversible MCT. */ 199 /* </summary> */ 200 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) { 201 return opj_mct_norms[compno]; 202 } 203 204 /* <summary> */ 205 /* Foward irreversible MCT. */ 206 /* </summary> */ 207 #ifdef __SSE4_1__ 208 void opj_mct_encode_real( 209 OPJ_INT32* restrict c0, 210 OPJ_INT32* restrict c1, 211 OPJ_INT32* restrict c2, 212 OPJ_UINT32 n) 213 { 214 OPJ_SIZE_T i; 215 const OPJ_SIZE_T len = n; 216 217 const __m128i ry = _mm_set1_epi32(2449); 218 const __m128i gy = _mm_set1_epi32(4809); 219 const __m128i by = _mm_set1_epi32(934); 220 const __m128i ru = _mm_set1_epi32(1382); 221 const __m128i gu = _mm_set1_epi32(2714); 222 /* const __m128i bu = _mm_set1_epi32(4096); */ 223 /* const __m128i rv = _mm_set1_epi32(4096); */ 224 const __m128i gv = _mm_set1_epi32(3430); 225 const __m128i bv = _mm_set1_epi32(666); 226 const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_SHUFFLE(1, 0, 1, 0)); 227 228 for(i = 0; i < (len & ~3U); i += 4) { 229 __m128i lo, hi; 230 __m128i y, u, v; 231 __m128i r = _mm_load_si128((const __m128i *)&(c0[i])); 232 __m128i g = _mm_load_si128((const __m128i *)&(c1[i])); 233 __m128i b = _mm_load_si128((const __m128i *)&(c2[i])); 234 235 lo = r; 236 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 237 lo = _mm_mul_epi32(lo, ry); 238 hi = _mm_mul_epi32(hi, ry); 239 lo = _mm_add_epi64(lo, mulround); 240 hi = _mm_add_epi64(hi, mulround); 241 lo = _mm_srli_epi64(lo, 13); 242 hi = _mm_slli_epi64(hi, 32-13); 243 y = _mm_blend_epi16(lo, hi, 0xCC); 244 245 lo = g; 246 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 247 lo = _mm_mul_epi32(lo, gy); 248 hi = _mm_mul_epi32(hi, gy); 249 lo = _mm_add_epi64(lo, mulround); 250 hi = _mm_add_epi64(hi, mulround); 251 lo = _mm_srli_epi64(lo, 13); 252 hi = _mm_slli_epi64(hi, 32-13); 253 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); 254 255 lo = b; 256 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 257 lo = _mm_mul_epi32(lo, by); 258 hi = _mm_mul_epi32(hi, by); 259 lo = _mm_add_epi64(lo, mulround); 260 hi = _mm_add_epi64(hi, mulround); 261 lo = _mm_srli_epi64(lo, 13); 262 hi = _mm_slli_epi64(hi, 32-13); 263 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC)); 264 _mm_store_si128((__m128i *)&(c0[i]), y); 265 266 /*lo = b; 267 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 268 lo = _mm_mul_epi32(lo, mulround); 269 hi = _mm_mul_epi32(hi, mulround);*/ 270 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0))); 271 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1))); 272 lo = _mm_slli_epi64(lo, 12); 273 hi = _mm_slli_epi64(hi, 12); 274 lo = _mm_add_epi64(lo, mulround); 275 hi = _mm_add_epi64(hi, mulround); 276 lo = _mm_srli_epi64(lo, 13); 277 hi = _mm_slli_epi64(hi, 32-13); 278 u = _mm_blend_epi16(lo, hi, 0xCC); 279 280 lo = r; 281 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 282 lo = _mm_mul_epi32(lo, ru); 283 hi = _mm_mul_epi32(hi, ru); 284 lo = _mm_add_epi64(lo, mulround); 285 hi = _mm_add_epi64(hi, mulround); 286 lo = _mm_srli_epi64(lo, 13); 287 hi = _mm_slli_epi64(hi, 32-13); 288 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); 289 290 lo = g; 291 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 292 lo = _mm_mul_epi32(lo, gu); 293 hi = _mm_mul_epi32(hi, gu); 294 lo = _mm_add_epi64(lo, mulround); 295 hi = _mm_add_epi64(hi, mulround); 296 lo = _mm_srli_epi64(lo, 13); 297 hi = _mm_slli_epi64(hi, 32-13); 298 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC)); 299 _mm_store_si128((__m128i *)&(c1[i]), u); 300 301 /*lo = r; 302 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); 303 lo = _mm_mul_epi32(lo, mulround); 304 hi = _mm_mul_epi32(hi, mulround);*/ 305 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0))); 306 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1))); 307 lo = _mm_slli_epi64(lo, 12); 308 hi = _mm_slli_epi64(hi, 12); 309 lo = _mm_add_epi64(lo, mulround); 310 hi = _mm_add_epi64(hi, mulround); 311 lo = _mm_srli_epi64(lo, 13); 312 hi = _mm_slli_epi64(hi, 32-13); 313 v = _mm_blend_epi16(lo, hi, 0xCC); 314 315 lo = g; 316 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1)); 317 lo = _mm_mul_epi32(lo, gv); 318 hi = _mm_mul_epi32(hi, gv); 319 lo = _mm_add_epi64(lo, mulround); 320 hi = _mm_add_epi64(hi, mulround); 321 lo = _mm_srli_epi64(lo, 13); 322 hi = _mm_slli_epi64(hi, 32-13); 323 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); 324 325 lo = b; 326 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1)); 327 lo = _mm_mul_epi32(lo, bv); 328 hi = _mm_mul_epi32(hi, bv); 329 lo = _mm_add_epi64(lo, mulround); 330 hi = _mm_add_epi64(hi, mulround); 331 lo = _mm_srli_epi64(lo, 13); 332 hi = _mm_slli_epi64(hi, 32-13); 333 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC)); 334 _mm_store_si128((__m128i *)&(c2[i]), v); 335 } 336 for(; i < len; ++i) { 337 OPJ_INT32 r = c0[i]; 338 OPJ_INT32 g = c1[i]; 339 OPJ_INT32 b = c2[i]; 340 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934); 341 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096); 342 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666); 343 c0[i] = y; 344 c1[i] = u; 345 c2[i] = v; 346 } 347 } 348 #else 349 void opj_mct_encode_real( 350 OPJ_INT32* restrict c0, 351 OPJ_INT32* restrict c1, 352 OPJ_INT32* restrict c2, 353 OPJ_UINT32 n) 354 { 355 OPJ_UINT32 i; 356 for(i = 0; i < n; ++i) { 357 OPJ_INT32 r = c0[i]; 358 OPJ_INT32 g = c1[i]; 359 OPJ_INT32 b = c2[i]; 360 OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934); 361 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096); 362 OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666); 363 c0[i] = y; 364 c1[i] = u; 365 c2[i] = v; 366 } 367 } 368 #endif 369 370 /* <summary> */ 371 /* Inverse irreversible MCT. */ 372 /* </summary> */ 373 void opj_mct_decode_real( 374 OPJ_FLOAT32* restrict c0, 375 OPJ_FLOAT32* restrict c1, 376 OPJ_FLOAT32* restrict c2, 377 OPJ_UINT32 n) 378 { 379 OPJ_UINT32 i; 380 #ifdef __SSE__ 381 __m128 vrv, vgu, vgv, vbu; 382 vrv = _mm_set1_ps(1.402f); 383 vgu = _mm_set1_ps(0.34413f); 384 vgv = _mm_set1_ps(0.71414f); 385 vbu = _mm_set1_ps(1.772f); 386 for (i = 0; i < (n >> 3); ++i) { 387 __m128 vy, vu, vv; 388 __m128 vr, vg, vb; 389 390 vy = _mm_load_ps(c0); 391 vu = _mm_load_ps(c1); 392 vv = _mm_load_ps(c2); 393 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 394 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); 395 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 396 _mm_store_ps(c0, vr); 397 _mm_store_ps(c1, vg); 398 _mm_store_ps(c2, vb); 399 c0 += 4; 400 c1 += 4; 401 c2 += 4; 402 403 vy = _mm_load_ps(c0); 404 vu = _mm_load_ps(c1); 405 vv = _mm_load_ps(c2); 406 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv)); 407 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv)); 408 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu)); 409 _mm_store_ps(c0, vr); 410 _mm_store_ps(c1, vg); 411 _mm_store_ps(c2, vb); 412 c0 += 4; 413 c1 += 4; 414 c2 += 4; 415 } 416 n &= 7; 417 #endif 418 for(i = 0; i < n; ++i) { 419 OPJ_FLOAT32 y = c0[i]; 420 OPJ_FLOAT32 u = c1[i]; 421 OPJ_FLOAT32 v = c2[i]; 422 OPJ_FLOAT32 r = y + (v * 1.402f); 423 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f)); 424 OPJ_FLOAT32 b = y + (u * 1.772f); 425 c0[i] = r; 426 c1[i] = g; 427 c2[i] = b; 428 } 429 } 430 431 /* <summary> */ 432 /* Get norm of basis function of irreversible MCT. */ 433 /* </summary> */ 434 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) { 435 return opj_mct_norms_real[compno]; 436 } 437 438 439 OPJ_BOOL opj_mct_encode_custom( 440 OPJ_BYTE * pCodingdata, 441 OPJ_UINT32 n, 442 OPJ_BYTE ** pData, 443 OPJ_UINT32 pNbComp, 444 OPJ_UINT32 isSigned) 445 { 446 OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata; 447 OPJ_UINT32 i; 448 OPJ_UINT32 j; 449 OPJ_UINT32 k; 450 OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp; 451 OPJ_INT32 * lCurrentData = 00; 452 OPJ_INT32 * lCurrentMatrix = 00; 453 OPJ_INT32 ** lData = (OPJ_INT32 **) pData; 454 OPJ_UINT32 lMultiplicator = 1 << 13; 455 OPJ_INT32 * lMctPtr; 456 457 OPJ_ARG_NOT_USED(isSigned); 458 459 lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32)); 460 if (! lCurrentData) { 461 return OPJ_FALSE; 462 } 463 464 lCurrentMatrix = lCurrentData + pNbComp; 465 466 for (i =0;i<lNbMatCoeff;++i) { 467 lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * (OPJ_FLOAT32)lMultiplicator); 468 } 469 470 for (i = 0; i < n; ++i) { 471 lMctPtr = lCurrentMatrix; 472 for (j=0;j<pNbComp;++j) { 473 lCurrentData[j] = (*(lData[j])); 474 } 475 476 for (j=0;j<pNbComp;++j) { 477 *(lData[j]) = 0; 478 for (k=0;k<pNbComp;++k) { 479 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]); 480 ++lMctPtr; 481 } 482 483 ++lData[j]; 484 } 485 } 486 487 opj_free(lCurrentData); 488 489 return OPJ_TRUE; 490 } 491 492 OPJ_BOOL opj_mct_decode_custom( 493 OPJ_BYTE * pDecodingData, 494 OPJ_UINT32 n, 495 OPJ_BYTE ** pData, 496 OPJ_UINT32 pNbComp, 497 OPJ_UINT32 isSigned) 498 { 499 OPJ_FLOAT32 * lMct; 500 OPJ_UINT32 i; 501 OPJ_UINT32 j; 502 OPJ_UINT32 k; 503 504 OPJ_FLOAT32 * lCurrentData = 00; 505 OPJ_FLOAT32 * lCurrentResult = 00; 506 OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData; 507 508 OPJ_ARG_NOT_USED(isSigned); 509 510 lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32)); 511 if (! lCurrentData) { 512 return OPJ_FALSE; 513 } 514 lCurrentResult = lCurrentData + pNbComp; 515 516 for (i = 0; i < n; ++i) { 517 lMct = (OPJ_FLOAT32 *) pDecodingData; 518 for (j=0;j<pNbComp;++j) { 519 lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j])); 520 } 521 for (j=0;j<pNbComp;++j) { 522 lCurrentResult[j] = 0; 523 for (k=0;k<pNbComp;++k) { 524 lCurrentResult[j] += *(lMct++) * lCurrentData[k]; 525 } 526 *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]); 527 } 528 } 529 opj_free(lCurrentData); 530 return OPJ_TRUE; 531 } 532 533 void opj_calculate_norms( OPJ_FLOAT64 * pNorms, 534 OPJ_UINT32 pNbComps, 535 OPJ_FLOAT32 * pMatrix) 536 { 537 OPJ_UINT32 i,j,lIndex; 538 OPJ_FLOAT32 lCurrentValue; 539 OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms; 540 OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix; 541 542 for (i=0;i<pNbComps;++i) { 543 lNorms[i] = 0; 544 lIndex = i; 545 546 for (j=0;j<pNbComps;++j) { 547 lCurrentValue = lMatrix[lIndex]; 548 lIndex += pNbComps; 549 lNorms[i] += lCurrentValue * lCurrentValue; 550 } 551 lNorms[i] = sqrt(lNorms[i]); 552 } 553 } 554