1 /* vim: set ts=8 sw=8 noexpandtab: */ 2 // qcms 3 // Copyright (C) 2009 Mozilla Corporation 4 // Copyright (C) 1998-2007 Marti Maria 5 // 6 // Permission is hereby granted, free of charge, to any person obtaining 7 // a copy of this software and associated documentation files (the "Software"), 8 // to deal in the Software without restriction, including without limitation 9 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 // and/or sell copies of the Software, and to permit persons to whom the Software 11 // is furnished to do so, subject to the following conditions: 12 // 13 // The above copyright notice and this permission notice shall be included in 14 // all copies or substantial portions of the Software. 15 // 16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 18 // THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 // LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 24 #include <stdlib.h> 25 #include <math.h> 26 #include <assert.h> 27 #include <string.h> //memcpy 28 #include "qcmsint.h" 29 #include "chain.h" 30 #include "matrix.h" 31 #include "transform_util.h" 32 33 /* for MSVC, GCC, Intel, and Sun compilers */ 34 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_M_AMD64) || defined(__x86_64__) || defined(__x86_64) 35 #define X86 36 #endif /* _M_IX86 || __i386__ || __i386 || _M_AMD64 || __x86_64__ || __x86_64 */ 37 38 // Build a White point, primary chromas transfer matrix from RGB to CIE XYZ 39 // This is just an approximation, I am not handling all the non-linear 40 // aspects of the RGB to XYZ process, and assumming that the gamma correction 41 // has transitive property in the tranformation chain. 42 // 43 // the alghoritm: 44 // 45 // - First I build the absolute conversion matrix using 46 // primaries in XYZ. This matrix is next inverted 47 // - Then I eval the source white point across this matrix 48 // obtaining the coeficients of the transformation 49 // - Then, I apply these coeficients to the original matrix 50 static struct matrix build_RGB_to_XYZ_transfer_matrix(qcms_CIE_xyY white, qcms_CIE_xyYTRIPLE primrs) 51 { 52 struct matrix primaries; 53 struct matrix primaries_invert; 54 struct matrix result; 55 struct vector white_point; 56 struct vector coefs; 57 58 double xn, yn; 59 double xr, yr; 60 double xg, yg; 61 double xb, yb; 62 63 xn = white.x; 64 yn = white.y; 65 66 if (yn == 0.0) 67 return matrix_invalid(); 68 69 xr = primrs.red.x; 70 yr = primrs.red.y; 71 xg = primrs.green.x; 72 yg = primrs.green.y; 73 xb = primrs.blue.x; 74 yb = primrs.blue.y; 75 76 primaries.m[0][0] = xr; 77 primaries.m[0][1] = xg; 78 primaries.m[0][2] = xb; 79 80 primaries.m[1][0] = yr; 81 primaries.m[1][1] = yg; 82 primaries.m[1][2] = yb; 83 84 primaries.m[2][0] = 1 - xr - yr; 85 primaries.m[2][1] = 1 - xg - yg; 86 primaries.m[2][2] = 1 - xb - yb; 87 primaries.invalid = false; 88 89 white_point.v[0] = xn/yn; 90 white_point.v[1] = 1.; 91 white_point.v[2] = (1.0-xn-yn)/yn; 92 93 primaries_invert = matrix_invert(primaries); 94 95 coefs = matrix_eval(primaries_invert, white_point); 96 97 result.m[0][0] = coefs.v[0]*xr; 98 result.m[0][1] = coefs.v[1]*xg; 99 result.m[0][2] = coefs.v[2]*xb; 100 101 result.m[1][0] = coefs.v[0]*yr; 102 result.m[1][1] = coefs.v[1]*yg; 103 result.m[1][2] = coefs.v[2]*yb; 104 105 result.m[2][0] = coefs.v[0]*(1.-xr-yr); 106 result.m[2][1] = coefs.v[1]*(1.-xg-yg); 107 result.m[2][2] = coefs.v[2]*(1.-xb-yb); 108 result.invalid = primaries_invert.invalid; 109 110 return result; 111 } 112 113 struct CIE_XYZ { 114 double X; 115 double Y; 116 double Z; 117 }; 118 119 /* CIE Illuminant D50 */ 120 static const struct CIE_XYZ D50_XYZ = { 121 0.9642, 122 1.0000, 123 0.8249 124 }; 125 126 /* from lcms: xyY2XYZ() 127 * corresponds to argyll: icmYxy2XYZ() */ 128 static struct CIE_XYZ xyY2XYZ(qcms_CIE_xyY source) 129 { 130 struct CIE_XYZ dest; 131 dest.X = (source.x / source.y) * source.Y; 132 dest.Y = source.Y; 133 dest.Z = ((1 - source.x - source.y) / source.y) * source.Y; 134 return dest; 135 } 136 137 /* from lcms: ComputeChromaticAdaption */ 138 // Compute chromatic adaption matrix using chad as cone matrix 139 static struct matrix 140 compute_chromatic_adaption(struct CIE_XYZ source_white_point, 141 struct CIE_XYZ dest_white_point, 142 struct matrix chad) 143 { 144 struct matrix chad_inv; 145 struct vector cone_source_XYZ, cone_source_rgb; 146 struct vector cone_dest_XYZ, cone_dest_rgb; 147 struct matrix cone, tmp; 148 149 tmp = chad; 150 chad_inv = matrix_invert(tmp); 151 152 cone_source_XYZ.v[0] = source_white_point.X; 153 cone_source_XYZ.v[1] = source_white_point.Y; 154 cone_source_XYZ.v[2] = source_white_point.Z; 155 156 cone_dest_XYZ.v[0] = dest_white_point.X; 157 cone_dest_XYZ.v[1] = dest_white_point.Y; 158 cone_dest_XYZ.v[2] = dest_white_point.Z; 159 160 cone_source_rgb = matrix_eval(chad, cone_source_XYZ); 161 cone_dest_rgb = matrix_eval(chad, cone_dest_XYZ); 162 163 cone.m[0][0] = cone_dest_rgb.v[0]/cone_source_rgb.v[0]; 164 cone.m[0][1] = 0; 165 cone.m[0][2] = 0; 166 cone.m[1][0] = 0; 167 cone.m[1][1] = cone_dest_rgb.v[1]/cone_source_rgb.v[1]; 168 cone.m[1][2] = 0; 169 cone.m[2][0] = 0; 170 cone.m[2][1] = 0; 171 cone.m[2][2] = cone_dest_rgb.v[2]/cone_source_rgb.v[2]; 172 cone.invalid = false; 173 174 // Normalize 175 return matrix_multiply(chad_inv, matrix_multiply(cone, chad)); 176 } 177 178 /* from lcms: cmsAdaptionMatrix */ 179 // Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll 180 // Bradford is assumed 181 static struct matrix 182 adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination) 183 { 184 #if defined (_MSC_VER) 185 #pragma warning(push) 186 /* Disable double to float truncation warning 4305 */ 187 #pragma warning(disable:4305) 188 #endif 189 struct matrix lam_rigg = {{ // Bradford matrix 190 { 0.8951, 0.2664, -0.1614 }, 191 { -0.7502, 1.7135, 0.0367 }, 192 { 0.0389, -0.0685, 1.0296 } 193 }}; 194 #if defined (_MSC_VER) 195 /* Restore warnings */ 196 #pragma warning(pop) 197 #endif 198 return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg); 199 } 200 201 /* from lcms: cmsAdaptMatrixToD50 */ 202 static struct matrix adapt_matrix_to_D50(struct matrix r, qcms_CIE_xyY source_white_pt) 203 { 204 struct CIE_XYZ Dn; 205 struct matrix Bradford; 206 207 if (source_white_pt.y == 0.0) 208 return matrix_invalid(); 209 210 Dn = xyY2XYZ(source_white_pt); 211 212 Bradford = adaption_matrix(Dn, D50_XYZ); 213 return matrix_multiply(Bradford, r); 214 } 215 216 qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcms_CIE_xyYTRIPLE primaries) 217 { 218 struct matrix colorants; 219 colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); 220 colorants = adapt_matrix_to_D50(colorants, white_point); 221 222 if (colorants.invalid) 223 return false; 224 225 /* note: there's a transpose type of operation going on here */ 226 profile->redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0]); 227 profile->redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0]); 228 profile->redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0]); 229 230 profile->greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1]); 231 profile->greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1]); 232 profile->greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1]); 233 234 profile->blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2]); 235 profile->blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2]); 236 profile->blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2]); 237 238 return true; 239 } 240 241 #if 0 242 static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 243 { 244 const int r_out = output_format.r; 245 const int b_out = output_format.b; 246 247 int i; 248 float (*mat)[4] = transform->matrix; 249 for (i=0; i<length; i++) { 250 unsigned char device_r = *src++; 251 unsigned char device_g = *src++; 252 unsigned char device_b = *src++; 253 254 float linear_r = transform->input_gamma_table_r[device_r]; 255 float linear_g = transform->input_gamma_table_g[device_g]; 256 float linear_b = transform->input_gamma_table_b[device_b]; 257 258 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 259 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 260 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 261 262 float out_device_r = pow(out_linear_r, transform->out_gamma_r); 263 float out_device_g = pow(out_linear_g, transform->out_gamma_g); 264 float out_device_b = pow(out_linear_b, transform->out_gamma_b); 265 266 dest[r_out] = clamp_u8(out_device_r*255); 267 dest[1] = clamp_u8(out_device_g*255); 268 dest[b_out] = clamp_u8(out_device_b*255); 269 dest += 3; 270 } 271 } 272 #endif 273 274 static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 275 { 276 const int r_out = output_format.r; 277 const int b_out = output_format.b; 278 279 unsigned int i; 280 for (i = 0; i < length; i++) { 281 float out_device_r, out_device_g, out_device_b; 282 unsigned char device = *src++; 283 284 float linear = transform->input_gamma_table_gray[device]; 285 286 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); 287 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); 288 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); 289 290 dest[r_out] = clamp_u8(out_device_r*255); 291 dest[1] = clamp_u8(out_device_g*255); 292 dest[b_out] = clamp_u8(out_device_b*255); 293 dest += 3; 294 } 295 } 296 297 /* Alpha is not corrected. 298 A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If 299 RGB Is?" Tech Memo 17 (December 14, 1998). 300 See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf 301 */ 302 303 static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 304 { 305 const int r_out = output_format.r; 306 const int b_out = output_format.b; 307 308 unsigned int i; 309 for (i = 0; i < length; i++) { 310 float out_device_r, out_device_g, out_device_b; 311 unsigned char device = *src++; 312 unsigned char alpha = *src++; 313 314 float linear = transform->input_gamma_table_gray[device]; 315 316 out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); 317 out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); 318 out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); 319 320 dest[r_out] = clamp_u8(out_device_r*255); 321 dest[1] = clamp_u8(out_device_g*255); 322 dest[b_out] = clamp_u8(out_device_b*255); 323 dest[3] = alpha; 324 dest += 4; 325 } 326 } 327 328 329 static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 330 { 331 const int r_out = output_format.r; 332 const int b_out = output_format.b; 333 334 unsigned int i; 335 for (i = 0; i < length; i++) { 336 unsigned char device = *src++; 337 uint16_t gray; 338 339 float linear = transform->input_gamma_table_gray[device]; 340 341 /* we could round here... */ 342 gray = linear * PRECACHE_OUTPUT_MAX; 343 344 dest[r_out] = transform->output_table_r->data[gray]; 345 dest[1] = transform->output_table_g->data[gray]; 346 dest[b_out] = transform->output_table_b->data[gray]; 347 dest += 3; 348 } 349 } 350 351 352 static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 353 { 354 const int r_out = output_format.r; 355 const int b_out = output_format.b; 356 357 unsigned int i; 358 for (i = 0; i < length; i++) { 359 unsigned char device = *src++; 360 unsigned char alpha = *src++; 361 uint16_t gray; 362 363 float linear = transform->input_gamma_table_gray[device]; 364 365 /* we could round here... */ 366 gray = linear * PRECACHE_OUTPUT_MAX; 367 368 dest[r_out] = transform->output_table_r->data[gray]; 369 dest[1] = transform->output_table_g->data[gray]; 370 dest[b_out] = transform->output_table_b->data[gray]; 371 dest[3] = alpha; 372 dest += 4; 373 } 374 } 375 376 static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 377 { 378 const int r_out = output_format.r; 379 const int b_out = output_format.b; 380 381 unsigned int i; 382 float (*mat)[4] = transform->matrix; 383 for (i = 0; i < length; i++) { 384 unsigned char device_r = *src++; 385 unsigned char device_g = *src++; 386 unsigned char device_b = *src++; 387 uint16_t r, g, b; 388 389 float linear_r = transform->input_gamma_table_r[device_r]; 390 float linear_g = transform->input_gamma_table_g[device_g]; 391 float linear_b = transform->input_gamma_table_b[device_b]; 392 393 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 394 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 395 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 396 397 out_linear_r = clamp_float(out_linear_r); 398 out_linear_g = clamp_float(out_linear_g); 399 out_linear_b = clamp_float(out_linear_b); 400 401 /* we could round here... */ 402 r = out_linear_r * PRECACHE_OUTPUT_MAX; 403 g = out_linear_g * PRECACHE_OUTPUT_MAX; 404 b = out_linear_b * PRECACHE_OUTPUT_MAX; 405 406 dest[r_out] = transform->output_table_r->data[r]; 407 dest[1] = transform->output_table_g->data[g]; 408 dest[b_out] = transform->output_table_b->data[b]; 409 dest += 3; 410 } 411 } 412 413 static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 414 { 415 const int r_out = output_format.r; 416 const int b_out = output_format.b; 417 418 unsigned int i; 419 float (*mat)[4] = transform->matrix; 420 for (i = 0; i < length; i++) { 421 unsigned char device_r = *src++; 422 unsigned char device_g = *src++; 423 unsigned char device_b = *src++; 424 unsigned char alpha = *src++; 425 uint16_t r, g, b; 426 427 float linear_r = transform->input_gamma_table_r[device_r]; 428 float linear_g = transform->input_gamma_table_g[device_g]; 429 float linear_b = transform->input_gamma_table_b[device_b]; 430 431 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 432 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 433 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 434 435 out_linear_r = clamp_float(out_linear_r); 436 out_linear_g = clamp_float(out_linear_g); 437 out_linear_b = clamp_float(out_linear_b); 438 439 /* we could round here... */ 440 r = out_linear_r * PRECACHE_OUTPUT_MAX; 441 g = out_linear_g * PRECACHE_OUTPUT_MAX; 442 b = out_linear_b * PRECACHE_OUTPUT_MAX; 443 444 dest[r_out] = transform->output_table_r->data[r]; 445 dest[1] = transform->output_table_g->data[g]; 446 dest[b_out] = transform->output_table_b->data[b]; 447 dest[3] = alpha; 448 dest += 4; 449 } 450 } 451 452 // Not used 453 /* 454 static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 455 { 456 const int r_out = output_format.r; 457 const int b_out = output_format.b; 458 459 unsigned int i; 460 int xy_len = 1; 461 int x_len = transform->grid_size; 462 int len = x_len * x_len; 463 float* r_table = transform->r_clut; 464 float* g_table = transform->g_clut; 465 float* b_table = transform->b_clut; 466 467 for (i = 0; i < length; i++) { 468 unsigned char in_r = *src++; 469 unsigned char in_g = *src++; 470 unsigned char in_b = *src++; 471 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; 472 473 int x = floor(linear_r * (transform->grid_size-1)); 474 int y = floor(linear_g * (transform->grid_size-1)); 475 int z = floor(linear_b * (transform->grid_size-1)); 476 int x_n = ceil(linear_r * (transform->grid_size-1)); 477 int y_n = ceil(linear_g * (transform->grid_size-1)); 478 int z_n = ceil(linear_b * (transform->grid_size-1)); 479 float x_d = linear_r * (transform->grid_size-1) - x; 480 float y_d = linear_g * (transform->grid_size-1) - y; 481 float z_d = linear_b * (transform->grid_size-1) - z; 482 483 float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); 484 float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); 485 float r_y1 = lerp(r_x1, r_x2, y_d); 486 float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); 487 float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); 488 float r_y2 = lerp(r_x3, r_x4, y_d); 489 float clut_r = lerp(r_y1, r_y2, z_d); 490 491 float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); 492 float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); 493 float g_y1 = lerp(g_x1, g_x2, y_d); 494 float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); 495 float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); 496 float g_y2 = lerp(g_x3, g_x4, y_d); 497 float clut_g = lerp(g_y1, g_y2, z_d); 498 499 float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); 500 float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); 501 float b_y1 = lerp(b_x1, b_x2, y_d); 502 float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); 503 float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); 504 float b_y2 = lerp(b_x3, b_x4, y_d); 505 float clut_b = lerp(b_y1, b_y2, z_d); 506 507 dest[r_out] = clamp_u8(clut_r*255.0f); 508 dest[1] = clamp_u8(clut_g*255.0f); 509 dest[b_out] = clamp_u8(clut_b*255.0f); 510 dest += 3; 511 } 512 } 513 */ 514 515 // Using lcms' tetra interpolation algorithm. 516 static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 517 { 518 const int r_out = output_format.r; 519 const int b_out = output_format.b; 520 521 unsigned int i; 522 int xy_len = 1; 523 int x_len = transform->grid_size; 524 int len = x_len * x_len; 525 float* r_table = transform->r_clut; 526 float* g_table = transform->g_clut; 527 float* b_table = transform->b_clut; 528 float c0_r, c1_r, c2_r, c3_r; 529 float c0_g, c1_g, c2_g, c3_g; 530 float c0_b, c1_b, c2_b, c3_b; 531 float clut_r, clut_g, clut_b; 532 for (i = 0; i < length; i++) { 533 unsigned char in_r = *src++; 534 unsigned char in_g = *src++; 535 unsigned char in_b = *src++; 536 unsigned char in_a = *src++; 537 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; 538 539 int x = floor(linear_r * (transform->grid_size-1)); 540 int y = floor(linear_g * (transform->grid_size-1)); 541 int z = floor(linear_b * (transform->grid_size-1)); 542 int x_n = ceil(linear_r * (transform->grid_size-1)); 543 int y_n = ceil(linear_g * (transform->grid_size-1)); 544 int z_n = ceil(linear_b * (transform->grid_size-1)); 545 float rx = linear_r * (transform->grid_size-1) - x; 546 float ry = linear_g * (transform->grid_size-1) - y; 547 float rz = linear_b * (transform->grid_size-1) - z; 548 549 c0_r = CLU(r_table, x, y, z); 550 c0_g = CLU(g_table, x, y, z); 551 c0_b = CLU(b_table, x, y, z); 552 553 if( rx >= ry ) { 554 if (ry >= rz) { //rx >= ry && ry >= rz 555 c1_r = CLU(r_table, x_n, y, z) - c0_r; 556 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); 557 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); 558 c1_g = CLU(g_table, x_n, y, z) - c0_g; 559 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); 560 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); 561 c1_b = CLU(b_table, x_n, y, z) - c0_b; 562 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); 563 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); 564 } else { 565 if (rx >= rz) { //rx >= rz && rz >= ry 566 c1_r = CLU(r_table, x_n, y, z) - c0_r; 567 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); 568 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); 569 c1_g = CLU(g_table, x_n, y, z) - c0_g; 570 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); 571 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); 572 c1_b = CLU(b_table, x_n, y, z) - c0_b; 573 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); 574 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); 575 } else { //rz > rx && rx >= ry 576 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); 577 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); 578 c3_r = CLU(r_table, x, y, z_n) - c0_r; 579 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); 580 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); 581 c3_g = CLU(g_table, x, y, z_n) - c0_g; 582 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); 583 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); 584 c3_b = CLU(b_table, x, y, z_n) - c0_b; 585 } 586 } 587 } else { 588 if (rx >= rz) { //ry > rx && rx >= rz 589 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); 590 c2_r = CLU(r_table, x, y_n, z) - c0_r; 591 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); 592 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); 593 c2_g = CLU(g_table, x, y_n, z) - c0_g; 594 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); 595 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); 596 c2_b = CLU(b_table, x, y_n, z) - c0_b; 597 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); 598 } else { 599 if (ry >= rz) { //ry >= rz && rz > rx 600 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); 601 c2_r = CLU(r_table, x, y_n, z) - c0_r; 602 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); 603 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); 604 c2_g = CLU(g_table, x, y_n, z) - c0_g; 605 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); 606 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); 607 c2_b = CLU(b_table, x, y_n, z) - c0_b; 608 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); 609 } else { //rz > ry && ry > rx 610 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); 611 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); 612 c3_r = CLU(r_table, x, y, z_n) - c0_r; 613 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); 614 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); 615 c3_g = CLU(g_table, x, y, z_n) - c0_g; 616 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); 617 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); 618 c3_b = CLU(b_table, x, y, z_n) - c0_b; 619 } 620 } 621 } 622 623 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; 624 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; 625 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; 626 627 dest[r_out] = clamp_u8(clut_r*255.0f); 628 dest[1] = clamp_u8(clut_g*255.0f); 629 dest[b_out] = clamp_u8(clut_b*255.0f); 630 dest[3] = in_a; 631 dest += 4; 632 } 633 } 634 635 // Using lcms' tetra interpolation code. 636 static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 637 { 638 const int r_out = output_format.r; 639 const int b_out = output_format.b; 640 641 unsigned int i; 642 int xy_len = 1; 643 int x_len = transform->grid_size; 644 int len = x_len * x_len; 645 float* r_table = transform->r_clut; 646 float* g_table = transform->g_clut; 647 float* b_table = transform->b_clut; 648 float c0_r, c1_r, c2_r, c3_r; 649 float c0_g, c1_g, c2_g, c3_g; 650 float c0_b, c1_b, c2_b, c3_b; 651 float clut_r, clut_g, clut_b; 652 for (i = 0; i < length; i++) { 653 unsigned char in_r = *src++; 654 unsigned char in_g = *src++; 655 unsigned char in_b = *src++; 656 float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; 657 658 int x = floor(linear_r * (transform->grid_size-1)); 659 int y = floor(linear_g * (transform->grid_size-1)); 660 int z = floor(linear_b * (transform->grid_size-1)); 661 int x_n = ceil(linear_r * (transform->grid_size-1)); 662 int y_n = ceil(linear_g * (transform->grid_size-1)); 663 int z_n = ceil(linear_b * (transform->grid_size-1)); 664 float rx = linear_r * (transform->grid_size-1) - x; 665 float ry = linear_g * (transform->grid_size-1) - y; 666 float rz = linear_b * (transform->grid_size-1) - z; 667 668 c0_r = CLU(r_table, x, y, z); 669 c0_g = CLU(g_table, x, y, z); 670 c0_b = CLU(b_table, x, y, z); 671 672 if( rx >= ry ) { 673 if (ry >= rz) { //rx >= ry && ry >= rz 674 c1_r = CLU(r_table, x_n, y, z) - c0_r; 675 c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); 676 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); 677 c1_g = CLU(g_table, x_n, y, z) - c0_g; 678 c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); 679 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); 680 c1_b = CLU(b_table, x_n, y, z) - c0_b; 681 c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); 682 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); 683 } else { 684 if (rx >= rz) { //rx >= rz && rz >= ry 685 c1_r = CLU(r_table, x_n, y, z) - c0_r; 686 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); 687 c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); 688 c1_g = CLU(g_table, x_n, y, z) - c0_g; 689 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); 690 c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); 691 c1_b = CLU(b_table, x_n, y, z) - c0_b; 692 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); 693 c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); 694 } else { //rz > rx && rx >= ry 695 c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); 696 c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); 697 c3_r = CLU(r_table, x, y, z_n) - c0_r; 698 c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); 699 c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); 700 c3_g = CLU(g_table, x, y, z_n) - c0_g; 701 c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); 702 c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); 703 c3_b = CLU(b_table, x, y, z_n) - c0_b; 704 } 705 } 706 } else { 707 if (rx >= rz) { //ry > rx && rx >= rz 708 c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); 709 c2_r = CLU(r_table, x, y_n, z) - c0_r; 710 c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); 711 c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); 712 c2_g = CLU(g_table, x, y_n, z) - c0_g; 713 c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); 714 c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); 715 c2_b = CLU(b_table, x, y_n, z) - c0_b; 716 c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); 717 } else { 718 if (ry >= rz) { //ry >= rz && rz > rx 719 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); 720 c2_r = CLU(r_table, x, y_n, z) - c0_r; 721 c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); 722 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); 723 c2_g = CLU(g_table, x, y_n, z) - c0_g; 724 c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); 725 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); 726 c2_b = CLU(b_table, x, y_n, z) - c0_b; 727 c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); 728 } else { //rz > ry && ry > rx 729 c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); 730 c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); 731 c3_r = CLU(r_table, x, y, z_n) - c0_r; 732 c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); 733 c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); 734 c3_g = CLU(g_table, x, y, z_n) - c0_g; 735 c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); 736 c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); 737 c3_b = CLU(b_table, x, y, z_n) - c0_b; 738 } 739 } 740 } 741 742 clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; 743 clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; 744 clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; 745 746 dest[r_out] = clamp_u8(clut_r*255.0f); 747 dest[1] = clamp_u8(clut_g*255.0f); 748 dest[b_out] = clamp_u8(clut_b*255.0f); 749 dest += 3; 750 } 751 } 752 753 static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 754 { 755 const int r_out = output_format.r; 756 const int b_out = output_format.b; 757 758 unsigned int i; 759 float (*mat)[4] = transform->matrix; 760 for (i = 0; i < length; i++) { 761 unsigned char device_r = *src++; 762 unsigned char device_g = *src++; 763 unsigned char device_b = *src++; 764 float out_device_r, out_device_g, out_device_b; 765 766 float linear_r = transform->input_gamma_table_r[device_r]; 767 float linear_g = transform->input_gamma_table_g[device_g]; 768 float linear_b = transform->input_gamma_table_b[device_b]; 769 770 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 771 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 772 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 773 774 out_linear_r = clamp_float(out_linear_r); 775 out_linear_g = clamp_float(out_linear_g); 776 out_linear_b = clamp_float(out_linear_b); 777 778 out_device_r = lut_interp_linear(out_linear_r, 779 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); 780 out_device_g = lut_interp_linear(out_linear_g, 781 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); 782 out_device_b = lut_interp_linear(out_linear_b, 783 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); 784 785 dest[r_out] = clamp_u8(out_device_r*255); 786 dest[1] = clamp_u8(out_device_g*255); 787 dest[b_out] = clamp_u8(out_device_b*255); 788 dest += 3; 789 } 790 } 791 792 static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 793 { 794 const int r_out = output_format.r; 795 const int b_out = output_format.b; 796 797 unsigned int i; 798 float (*mat)[4] = transform->matrix; 799 for (i = 0; i < length; i++) { 800 unsigned char device_r = *src++; 801 unsigned char device_g = *src++; 802 unsigned char device_b = *src++; 803 unsigned char alpha = *src++; 804 float out_device_r, out_device_g, out_device_b; 805 806 float linear_r = transform->input_gamma_table_r[device_r]; 807 float linear_g = transform->input_gamma_table_g[device_g]; 808 float linear_b = transform->input_gamma_table_b[device_b]; 809 810 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 811 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 812 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 813 814 out_linear_r = clamp_float(out_linear_r); 815 out_linear_g = clamp_float(out_linear_g); 816 out_linear_b = clamp_float(out_linear_b); 817 818 out_device_r = lut_interp_linear(out_linear_r, 819 transform->output_gamma_lut_r, transform->output_gamma_lut_r_length); 820 out_device_g = lut_interp_linear(out_linear_g, 821 transform->output_gamma_lut_g, transform->output_gamma_lut_g_length); 822 out_device_b = lut_interp_linear(out_linear_b, 823 transform->output_gamma_lut_b, transform->output_gamma_lut_b_length); 824 825 dest[r_out] = clamp_u8(out_device_r*255); 826 dest[1] = clamp_u8(out_device_g*255); 827 dest[b_out] = clamp_u8(out_device_b*255); 828 dest[3] = alpha; 829 dest += 4; 830 } 831 } 832 833 #if 0 834 static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format) 835 { 836 const int r_out = output_format.r; 837 const int b_out = output_format.b; 838 839 int i; 840 float (*mat)[4] = transform->matrix; 841 for (i = 0; i < length; i++) { 842 unsigned char device_r = *src++; 843 unsigned char device_g = *src++; 844 unsigned char device_b = *src++; 845 846 float linear_r = transform->input_gamma_table_r[device_r]; 847 float linear_g = transform->input_gamma_table_g[device_g]; 848 float linear_b = transform->input_gamma_table_b[device_b]; 849 850 float out_linear_r = mat[0][0]*linear_r + mat[1][0]*linear_g + mat[2][0]*linear_b; 851 float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b; 852 float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b; 853 854 dest[r_out] = clamp_u8(out_linear_r*255); 855 dest[1] = clamp_u8(out_linear_g*255); 856 dest[b_out] = clamp_u8(out_linear_b*255); 857 dest += 3; 858 } 859 } 860 #endif 861 862 static struct precache_output *precache_reference(struct precache_output *p) 863 { 864 p->ref_count++; 865 return p; 866 } 867 868 static struct precache_output *precache_create() 869 { 870 struct precache_output *p = malloc(sizeof(struct precache_output)); 871 if (p) 872 p->ref_count = 1; 873 return p; 874 } 875 876 void precache_release(struct precache_output *p) 877 { 878 if (--p->ref_count == 0) { 879 free(p); 880 } 881 } 882 883 #ifdef HAVE_POSIX_MEMALIGN 884 static qcms_transform *transform_alloc(void) 885 { 886 qcms_transform *t; 887 if (!posix_memalign(&t, 16, sizeof(*t))) { 888 return t; 889 } else { 890 return NULL; 891 } 892 } 893 static void transform_free(qcms_transform *t) 894 { 895 free(t); 896 } 897 #else 898 static qcms_transform *transform_alloc(void) 899 { 900 /* transform needs to be aligned on a 16byte boundrary */ 901 char *original_block = calloc(sizeof(qcms_transform) + sizeof(void*) + 16, 1); 902 /* make room for a pointer to the block returned by calloc */ 903 void *transform_start = original_block + sizeof(void*); 904 /* align transform_start */ 905 qcms_transform *transform_aligned = (qcms_transform*)(((uintptr_t)transform_start + 15) & ~0xf); 906 907 /* store a pointer to the block returned by calloc so that we can free it later */ 908 void **(original_block_ptr) = (void**)transform_aligned; 909 if (!original_block) 910 return NULL; 911 original_block_ptr--; 912 *original_block_ptr = original_block; 913 914 return transform_aligned; 915 } 916 static void transform_free(qcms_transform *t) 917 { 918 /* get at the pointer to the unaligned block returned by calloc */ 919 void **p = (void**)t; 920 p--; 921 free(*p); 922 } 923 #endif 924 925 void qcms_transform_release(qcms_transform *t) 926 { 927 /* ensure we only free the gamma tables once even if there are 928 * multiple references to the same data */ 929 930 if (t->output_table_r) 931 precache_release(t->output_table_r); 932 if (t->output_table_g) 933 precache_release(t->output_table_g); 934 if (t->output_table_b) 935 precache_release(t->output_table_b); 936 937 free(t->input_gamma_table_r); 938 if (t->input_gamma_table_g != t->input_gamma_table_r) 939 free(t->input_gamma_table_g); 940 if (t->input_gamma_table_g != t->input_gamma_table_r && 941 t->input_gamma_table_g != t->input_gamma_table_b) 942 free(t->input_gamma_table_b); 943 944 free(t->input_gamma_table_gray); 945 946 free(t->output_gamma_lut_r); 947 free(t->output_gamma_lut_g); 948 free(t->output_gamma_lut_b); 949 950 transform_free(t); 951 } 952 953 #ifdef X86 954 // Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in 955 // mozilla/jpeg) 956 // ------------------------------------------------------------------------- 957 #if defined(_M_IX86) && defined(_MSC_VER) 958 #define HAS_CPUID 959 /* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC 960 register - I'm not sure if that ever happens on windows, but cpuid isn't 961 on the critical path so we just preserve the register to be safe and to be 962 consistent with the non-windows version. */ 963 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { 964 uint32_t a_, b_, c_, d_; 965 __asm { 966 xchg ebx, esi 967 mov eax, fxn 968 cpuid 969 mov a_, eax 970 mov b_, ebx 971 mov c_, ecx 972 mov d_, edx 973 xchg ebx, esi 974 } 975 *a = a_; 976 *b = b_; 977 *c = c_; 978 *d = d_; 979 } 980 #elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386)) 981 #define HAS_CPUID 982 /* Get us a CPUID function. We can't use ebx because it's the PIC register on 983 some platforms, so we use ESI instead and save ebx to avoid clobbering it. */ 984 static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { 985 986 uint32_t a_, b_, c_, d_; 987 __asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;" 988 : "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn)); 989 *a = a_; 990 *b = b_; 991 *c = c_; 992 *d = d_; 993 } 994 #endif 995 996 // -------------------------Runtime SSEx Detection----------------------------- 997 998 /* MMX is always supported per 999 * Gecko v1.9.1 minimum CPU requirements */ 1000 #define SSE1_EDX_MASK (1UL << 25) 1001 #define SSE2_EDX_MASK (1UL << 26) 1002 #define SSE3_ECX_MASK (1UL << 0) 1003 1004 static int sse_version_available(void) 1005 { 1006 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) 1007 /* we know at build time that 64-bit CPUs always have SSE2 1008 * this tells the compiler that non-SSE2 branches will never be 1009 * taken (i.e. OK to optimze away the SSE1 and non-SIMD code */ 1010 return 2; 1011 #elif defined(HAS_CPUID) 1012 static int sse_version = -1; 1013 uint32_t a, b, c, d; 1014 uint32_t function = 0x00000001; 1015 1016 if (sse_version == -1) { 1017 sse_version = 0; 1018 cpuid(function, &a, &b, &c, &d); 1019 if (c & SSE3_ECX_MASK) 1020 sse_version = 3; 1021 else if (d & SSE2_EDX_MASK) 1022 sse_version = 2; 1023 else if (d & SSE1_EDX_MASK) 1024 sse_version = 1; 1025 } 1026 1027 return sse_version; 1028 #else 1029 return 0; 1030 #endif 1031 } 1032 #endif 1033 1034 static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f}, 1035 {-0.7502f, 1.7135f, 0.0367f}, 1036 { 0.0389f,-0.0685f, 1.0296f}}, 1037 false}; 1038 1039 static const struct matrix bradford_matrix_inv = {{ { 0.9869929f,-0.1470543f, 0.1599627f}, 1040 { 0.4323053f, 0.5183603f, 0.0492912f}, 1041 {-0.0085287f, 0.0400428f, 0.9684867f}}, 1042 false}; 1043 1044 // See ICCv4 E.3 1045 struct matrix compute_whitepoint_adaption(float X, float Y, float Z) { 1046 float p = (0.96422f*bradford_matrix.m[0][0] + 1.000f*bradford_matrix.m[1][0] + 0.82521f*bradford_matrix.m[2][0]) / 1047 (X*bradford_matrix.m[0][0] + Y*bradford_matrix.m[1][0] + Z*bradford_matrix.m[2][0] ); 1048 float y = (0.96422f*bradford_matrix.m[0][1] + 1.000f*bradford_matrix.m[1][1] + 0.82521f*bradford_matrix.m[2][1]) / 1049 (X*bradford_matrix.m[0][1] + Y*bradford_matrix.m[1][1] + Z*bradford_matrix.m[2][1] ); 1050 float b = (0.96422f*bradford_matrix.m[0][2] + 1.000f*bradford_matrix.m[1][2] + 0.82521f*bradford_matrix.m[2][2]) / 1051 (X*bradford_matrix.m[0][2] + Y*bradford_matrix.m[1][2] + Z*bradford_matrix.m[2][2] ); 1052 struct matrix white_adaption = {{ {p,0,0}, {0,y,0}, {0,0,b}}, false}; 1053 return matrix_multiply( bradford_matrix_inv, matrix_multiply(white_adaption, bradford_matrix) ); 1054 } 1055 1056 void qcms_profile_precache_output_transform(qcms_profile *profile) 1057 { 1058 /* we only support precaching on rgb profiles */ 1059 if (profile->color_space != RGB_SIGNATURE) 1060 return; 1061 1062 if (qcms_supports_iccv4) { 1063 /* don't precache since we will use the B2A LUT */ 1064 if (profile->B2A0) 1065 return; 1066 1067 /* don't precache since we will use the mBA LUT */ 1068 if (profile->mBA) 1069 return; 1070 } 1071 1072 /* don't precache if we do not have the TRC curves */ 1073 if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC) 1074 return; 1075 1076 if (!profile->output_table_r) { 1077 profile->output_table_r = precache_create(); 1078 if (profile->output_table_r && 1079 !compute_precache(profile->redTRC, profile->output_table_r->data)) { 1080 precache_release(profile->output_table_r); 1081 profile->output_table_r = NULL; 1082 } 1083 } 1084 if (!profile->output_table_g) { 1085 profile->output_table_g = precache_create(); 1086 if (profile->output_table_g && 1087 !compute_precache(profile->greenTRC, profile->output_table_g->data)) { 1088 precache_release(profile->output_table_g); 1089 profile->output_table_g = NULL; 1090 } 1091 } 1092 if (!profile->output_table_b) { 1093 profile->output_table_b = precache_create(); 1094 if (profile->output_table_b && 1095 !compute_precache(profile->blueTRC, profile->output_table_b->data)) { 1096 precache_release(profile->output_table_b); 1097 profile->output_table_b = NULL; 1098 } 1099 } 1100 } 1101 1102 /* Replace the current transformation with a LUT transformation using a given number of sample points */ 1103 qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms_profile *in, qcms_profile *out, 1104 int samples, qcms_data_type in_type) 1105 { 1106 /* The range between which 2 consecutive sample points can be used to interpolate */ 1107 uint16_t x,y,z; 1108 uint32_t l; 1109 uint32_t lutSize = 3 * samples * samples * samples; 1110 float* src = NULL; 1111 float* dest = NULL; 1112 float* lut = NULL; 1113 1114 src = malloc(lutSize*sizeof(float)); 1115 dest = malloc(lutSize*sizeof(float)); 1116 1117 if (src && dest) { 1118 /* Prepare a list of points we want to sample */ 1119 l = 0; 1120 for (x = 0; x < samples; x++) { 1121 for (y = 0; y < samples; y++) { 1122 for (z = 0; z < samples; z++) { 1123 src[l++] = x / (float)(samples-1); 1124 src[l++] = y / (float)(samples-1); 1125 src[l++] = z / (float)(samples-1); 1126 } 1127 } 1128 } 1129 1130 lut = qcms_chain_transform(in, out, src, dest, lutSize); 1131 if (lut) { 1132 transform->r_clut = &lut[0]; 1133 transform->g_clut = &lut[1]; 1134 transform->b_clut = &lut[2]; 1135 transform->grid_size = samples; 1136 if (in_type == QCMS_DATA_RGBA_8) { 1137 transform->transform_fn = qcms_transform_data_tetra_clut_rgba; 1138 } else { 1139 transform->transform_fn = qcms_transform_data_tetra_clut; 1140 } 1141 } 1142 } 1143 1144 1145 //XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed 1146 if (src && lut != src) { 1147 free(src); 1148 } else if (dest && lut != src) { 1149 free(dest); 1150 } 1151 1152 if (lut == NULL) { 1153 return NULL; 1154 } 1155 return transform; 1156 } 1157 1158 #define NO_MEM_TRANSFORM NULL 1159 1160 qcms_transform* qcms_transform_create( 1161 qcms_profile *in, qcms_data_type in_type, 1162 qcms_profile *out, qcms_data_type out_type, 1163 qcms_intent intent) 1164 { 1165 bool precache = false; 1166 1167 qcms_transform *transform = transform_alloc(); 1168 if (!transform) { 1169 return NULL; 1170 } 1171 if (out_type != QCMS_DATA_RGB_8 && 1172 out_type != QCMS_DATA_RGBA_8) { 1173 assert(0 && "output type"); 1174 transform_free(transform); 1175 return NULL; 1176 } 1177 1178 if (out->output_table_r && 1179 out->output_table_g && 1180 out->output_table_b) { 1181 precache = true; 1182 } 1183 1184 if (qcms_supports_iccv4 && (in->A2B0 || out->B2A0 || in->mAB || out->mAB)) { 1185 // Precache the transformation to a CLUT 33x33x33 in size. 1186 // 33 is used by many profiles and works well in pratice. 1187 // This evenly divides 256 into blocks of 8x8x8. 1188 // TODO For transforming small data sets of about 200x200 or less 1189 // precaching should be avoided. 1190 qcms_transform *result = qcms_transform_precacheLUT_float(transform, in, out, 33, in_type); 1191 if (!result) { 1192 assert(0 && "precacheLUT failed"); 1193 transform_free(transform); 1194 return NULL; 1195 } 1196 return result; 1197 } 1198 1199 if (precache) { 1200 transform->output_table_r = precache_reference(out->output_table_r); 1201 transform->output_table_g = precache_reference(out->output_table_g); 1202 transform->output_table_b = precache_reference(out->output_table_b); 1203 } else { 1204 if (!out->redTRC || !out->greenTRC || !out->blueTRC) { 1205 qcms_transform_release(transform); 1206 return NO_MEM_TRANSFORM; 1207 } 1208 build_output_lut(out->redTRC, &transform->output_gamma_lut_r, &transform->output_gamma_lut_r_length); 1209 build_output_lut(out->greenTRC, &transform->output_gamma_lut_g, &transform->output_gamma_lut_g_length); 1210 build_output_lut(out->blueTRC, &transform->output_gamma_lut_b, &transform->output_gamma_lut_b_length); 1211 if (!transform->output_gamma_lut_r || !transform->output_gamma_lut_g || !transform->output_gamma_lut_b) { 1212 qcms_transform_release(transform); 1213 return NO_MEM_TRANSFORM; 1214 } 1215 } 1216 1217 if (in->color_space == RGB_SIGNATURE) { 1218 struct matrix in_matrix, out_matrix, result; 1219 1220 if (in_type != QCMS_DATA_RGB_8 && 1221 in_type != QCMS_DATA_RGBA_8){ 1222 assert(0 && "input type"); 1223 transform_free(transform); 1224 return NULL; 1225 } 1226 if (precache) { 1227 #if defined(SSE2_ENABLE) && defined(X86) 1228 if (sse_version_available() >= 2) { 1229 if (in_type == QCMS_DATA_RGB_8) 1230 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2; 1231 else 1232 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2; 1233 1234 #if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64)) 1235 /* Microsoft Compiler for x64 doesn't support MMX. 1236 * SSE code uses MMX so that we disable on x64 */ 1237 } else 1238 if (sse_version_available() >= 1) { 1239 if (in_type == QCMS_DATA_RGB_8) 1240 transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1; 1241 else 1242 transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1; 1243 #endif 1244 } else 1245 #endif 1246 { 1247 if (in_type == QCMS_DATA_RGB_8) 1248 transform->transform_fn = qcms_transform_data_rgb_out_lut_precache; 1249 else 1250 transform->transform_fn = qcms_transform_data_rgba_out_lut_precache; 1251 } 1252 } else { 1253 if (in_type == QCMS_DATA_RGB_8) 1254 transform->transform_fn = qcms_transform_data_rgb_out_lut; 1255 else 1256 transform->transform_fn = qcms_transform_data_rgba_out_lut; 1257 } 1258 1259 //XXX: avoid duplicating tables if we can 1260 transform->input_gamma_table_r = build_input_gamma_table(in->redTRC); 1261 transform->input_gamma_table_g = build_input_gamma_table(in->greenTRC); 1262 transform->input_gamma_table_b = build_input_gamma_table(in->blueTRC); 1263 if (!transform->input_gamma_table_r || !transform->input_gamma_table_g || !transform->input_gamma_table_b) { 1264 qcms_transform_release(transform); 1265 return NO_MEM_TRANSFORM; 1266 } 1267 1268 1269 /* build combined colorant matrix */ 1270 in_matrix = build_colorant_matrix(in); 1271 out_matrix = build_colorant_matrix(out); 1272 out_matrix = matrix_invert(out_matrix); 1273 if (out_matrix.invalid) { 1274 qcms_transform_release(transform); 1275 return NULL; 1276 } 1277 result = matrix_multiply(out_matrix, in_matrix); 1278 1279 /* store the results in column major mode 1280 * this makes doing the multiplication with sse easier */ 1281 transform->matrix[0][0] = result.m[0][0]; 1282 transform->matrix[1][0] = result.m[0][1]; 1283 transform->matrix[2][0] = result.m[0][2]; 1284 transform->matrix[0][1] = result.m[1][0]; 1285 transform->matrix[1][1] = result.m[1][1]; 1286 transform->matrix[2][1] = result.m[1][2]; 1287 transform->matrix[0][2] = result.m[2][0]; 1288 transform->matrix[1][2] = result.m[2][1]; 1289 transform->matrix[2][2] = result.m[2][2]; 1290 1291 } else if (in->color_space == GRAY_SIGNATURE) { 1292 if (in_type != QCMS_DATA_GRAY_8 && 1293 in_type != QCMS_DATA_GRAYA_8){ 1294 assert(0 && "input type"); 1295 transform_free(transform); 1296 return NULL; 1297 } 1298 1299 transform->input_gamma_table_gray = build_input_gamma_table(in->grayTRC); 1300 if (!transform->input_gamma_table_gray) { 1301 qcms_transform_release(transform); 1302 return NO_MEM_TRANSFORM; 1303 } 1304 1305 if (precache) { 1306 if (in_type == QCMS_DATA_GRAY_8) { 1307 transform->transform_fn = qcms_transform_data_gray_out_precache; 1308 } else { 1309 transform->transform_fn = qcms_transform_data_graya_out_precache; 1310 } 1311 } else { 1312 if (in_type == QCMS_DATA_GRAY_8) { 1313 transform->transform_fn = qcms_transform_data_gray_out_lut; 1314 } else { 1315 transform->transform_fn = qcms_transform_data_graya_out_lut; 1316 } 1317 } 1318 } else { 1319 assert(0 && "unexpected colorspace"); 1320 transform_free(transform); 1321 return NULL; 1322 } 1323 return transform; 1324 } 1325 1326 /* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused 1327 * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence 1328 * of the attribute but is currently only supported by clang */ 1329 #if defined(__has_attribute) 1330 #define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__) 1331 #elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__) 1332 #define HAS_FORCE_ALIGN_ARG_POINTER 1 1333 #else 1334 #define HAS_FORCE_ALIGN_ARG_POINTER 0 1335 #endif 1336 1337 #if HAS_FORCE_ALIGN_ARG_POINTER 1338 /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */ 1339 __attribute__((__force_align_arg_pointer__)) 1340 #endif 1341 void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length) 1342 { 1343 static const struct _qcms_format_type output_rgbx = { 0, 2 }; 1344 1345 transform->transform_fn(transform, src, dest, length, output_rgbx); 1346 } 1347 1348 void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type) 1349 { 1350 static const struct _qcms_format_type output_rgbx = { 0, 2 }; 1351 static const struct _qcms_format_type output_bgrx = { 2, 0 }; 1352 1353 transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx); 1354 } 1355 1356 qcms_bool qcms_supports_iccv4; 1357 void qcms_enable_iccv4() 1358 { 1359 qcms_supports_iccv4 = true; 1360 } 1361