Home | History | Annotate | Download | only in qcms
      1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
      2 index 36b7011..d3c3dfe 100644
      3 --- a/third_party/qcms/src/iccread.c
      4 +++ b/third_party/qcms/src/iccread.c
      5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
      6         if (profile->color_space != RGB_SIGNATURE)
      7  	       return false;
      8  
      9 -       if (profile->A2B0 || profile->B2A0)
     10 +       if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
     11                 return false;
     12  
     13         rX = s15Fixed16Number_to_float(profile->redColorant.X);
     14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     15         sum[1] = rY + gY + bY;
     16         sum[2] = rZ + gZ + bZ;
     17  
     18 +#if defined (_MSC_VER)
     19 +#pragma warning(push)
     20 +/* Disable double to float truncation warning 4305 */
     21 +#pragma warning(disable:4305)
     22 +#endif
     23         // Build our target vector (see mozilla bug 460629)
     24         target[0] = 0.96420;
     25         target[1] = 1.00000;
     26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     27         tolerance[1] = 0.02;
     28         tolerance[2] = 0.04;
     29  
     30 +#if defined (_MSC_VER)
     31 +/* Restore warnings */
     32 +#pragma warning(pop)
     33 +#endif
     34         // Compare with our tolerance
     35         for (i = 0; i < 3; ++i) {
     36             if (!(((sum[i] - tolerance[i]) <= target[i]) &&
     37 @@ -402,7 +411,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
     38  // present that are not part of the tag_index.
     39  static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
     40  {
     41 -	static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     42 +	static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     43  	struct curveType *curve = NULL;
     44  	uint32_t type = read_u32(src, offset);
     45  	uint32_t count;
     46 @@ -657,7 +666,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
     47  	uint16_t num_input_table_entries;
     48  	uint16_t num_output_table_entries;
     49  	uint8_t in_chan, grid_points, out_chan;
     50 -	uint32_t clut_offset, output_offset;
     51 +	size_t clut_offset, output_offset;
     52  	uint32_t clut_size;
     53  	size_t entry_size;
     54  	struct lutType *lut;
     55 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
     56 index 7d83623..11fe222 100644
     57 --- a/third_party/qcms/src/qcms.h
     58 +++ b/third_party/qcms/src/qcms.h
     59 @@ -40,6 +40,12 @@ sale, use or other dealings in this Software without written
     60  authorization from SunSoft Inc. 
     61  ******************************************************************/
     62  
     63 +/*
     64 + * QCMS, in general, is not threadsafe. However, it should be safe to create
     65 + * profile and transformation objects on different threads, so long as you
     66 + * don't use the same objects on different threads at the same time.
     67 + */
     68 +
     69  /* 
     70   * Color Space Signatures
     71   * Note that only icSigXYZData and icSigLabData are valid
     72 @@ -102,6 +108,12 @@ typedef enum {
     73  	QCMS_DATA_GRAYA_8
     74  } qcms_data_type;
     75  
     76 +/* Format of the output data for qcms_transform_data_type() */
     77 +typedef enum {
     78 +	QCMS_OUTPUT_RGBX,
     79 +	QCMS_OUTPUT_BGRX
     80 +} qcms_output_type;
     81 +
     82  /* the names for the following two types are sort of ugly */
     83  typedef struct
     84  {
     85 @@ -146,6 +158,7 @@ qcms_transform* qcms_transform_create(
     86  void qcms_transform_release(qcms_transform *);
     87  
     88  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
     89 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
     90  
     91  void qcms_enable_iccv4();
     92  
     93 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
     94 index 53a3420..af20948 100644
     95 --- a/third_party/qcms/src/qcmsint.h
     96 +++ b/third_party/qcms/src/qcmsint.h
     97 @@ -45,6 +45,11 @@ struct precache_output
     98  #define ALIGN __attribute__(( aligned (16) ))
     99  #endif
    100  
    101 +typedef struct _qcms_format_type {
    102 +	int r;
    103 +	int b;
    104 +} qcms_format_type;
    105 +
    106  struct _qcms_transform {
    107  	float ALIGN matrix[3][4];
    108  	float *input_gamma_table_r;
    109 @@ -88,7 +93,7 @@ struct _qcms_transform {
    110  	struct precache_output *output_table_g;
    111  	struct precache_output *output_table_b;
    112  
    113 -	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
    114 +	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
    115  };
    116  
    117  struct matrix {
    118 @@ -280,18 +285,40 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    119  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    120                                            unsigned char *src,
    121                                            unsigned char *dest,
    122 -                                          size_t length);
    123 +                                          size_t length,
    124 +                                          qcms_format_type output_format);
    125  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    126                                            unsigned char *src,
    127                                            unsigned char *dest,
    128 -                                          size_t length);
    129 +                                          size_t length,
    130 +                                          qcms_format_type output_format);
    131  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    132                                            unsigned char *src,
    133                                            unsigned char *dest,
    134 -                                          size_t length);
    135 +                                          size_t length,
    136 +                                          qcms_format_type output_format);
    137  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    138                                            unsigned char *src,
    139                                            unsigned char *dest,
    140 -                                          size_t length);
    141 +                                          size_t length,
    142 +                                          qcms_format_type output_format);
    143  
    144  extern qcms_bool qcms_supports_iccv4;
    145 +
    146 +
    147 +#ifdef _MSC_VER
    148 +
    149 +long __cdecl _InterlockedIncrement(long volatile *);
    150 +long __cdecl _InterlockedDecrement(long volatile *);
    151 +#pragma intrinsic(_InterlockedIncrement)
    152 +#pragma intrinsic(_InterlockedDecrement)
    153 + 
    154 +#define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
    155 +#define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
    156 + 
    157 +#else
    158 +
    159 +#define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
    160 +#define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
    161 +
    162 +#endif
    163 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
    164 index 56d8de3..9a9b197 100644
    165 --- a/third_party/qcms/src/qcmstypes.h
    166 +++ b/third_party/qcms/src/qcmstypes.h
    167 @@ -87,7 +87,12 @@ typedef unsigned __int64 uint64_t;
    168  #ifdef _WIN64
    169  typedef unsigned __int64 uintptr_t;
    170  #else
    171 +#pragma warning(push)
    172 +/* Disable benign redefinition of type warning 4142 */
    173 +#pragma warning(disable:4142)
    174  typedef unsigned long uintptr_t;
    175 +/* Restore warnings */
    176 +#pragma warning(pop)
    177  #endif
    178  
    179  #elif defined (_AIX)
    180 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
    181 index 2f34db5..aaee1bf 100644
    182 --- a/third_party/qcms/src/transform-sse1.c
    183 +++ b/third_party/qcms/src/transform-sse1.c
    184 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    185  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    186                                            unsigned char *src,
    187                                            unsigned char *dest,
    188 -                                          size_t length)
    189 +                                          size_t length,
    190 +                                          qcms_format_type output_format)
    191  {
    192      unsigned int i;
    193      float (*mat)[4] = transform->matrix;
    194 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    195  
    196      /* working variables */
    197      __m128 vec_r, vec_g, vec_b, result;
    198 +    const int r_out = output_format.r;
    199 +    const int b_out = output_format.b;
    200  
    201      /* CYA */
    202      if (!length)
    203 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    204          src += 3;
    205  
    206          /* use calc'd indices to output RGB values */
    207 -        dest[0] = otdata_r[output[0]];
    208 -        dest[1] = otdata_g[output[1]];
    209 -        dest[2] = otdata_b[output[2]];
    210 +        dest[r_out] = otdata_r[output[0]];
    211 +        dest[1]     = otdata_g[output[1]];
    212 +        dest[b_out] = otdata_b[output[2]];
    213          dest += 3;
    214      }
    215  
    216 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    217      result = _mm_movehl_ps(result, result);
    218      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    219  
    220 -    dest[0] = otdata_r[output[0]];
    221 -    dest[1] = otdata_g[output[1]];
    222 -    dest[2] = otdata_b[output[2]];
    223 +    dest[r_out] = otdata_r[output[0]];
    224 +    dest[1]     = otdata_g[output[1]];
    225 +    dest[b_out] = otdata_b[output[2]];
    226  
    227      _mm_empty();
    228  }
    229 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    230  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    231                                             unsigned char *src,
    232                                             unsigned char *dest,
    233 -                                           size_t length)
    234 +                                           size_t length,
    235 +                                           qcms_format_type output_format)
    236  {
    237      unsigned int i;
    238      float (*mat)[4] = transform->matrix;
    239 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    240  
    241      /* working variables */
    242      __m128 vec_r, vec_g, vec_b, result;
    243 +    const int r_out = output_format.r;
    244 +    const int b_out = output_format.b;
    245      unsigned char alpha;
    246  
    247      /* CYA */
    248 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    249          src += 4;
    250  
    251          /* use calc'd indices to output RGB values */
    252 -        dest[0] = otdata_r[output[0]];
    253 -        dest[1] = otdata_g[output[1]];
    254 -        dest[2] = otdata_b[output[2]];
    255 +        dest[r_out] = otdata_r[output[0]];
    256 +        dest[1]     = otdata_g[output[1]];
    257 +        dest[b_out] = otdata_b[output[2]];
    258          dest += 4;
    259      }
    260  
    261 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    262      result = _mm_movehl_ps(result, result);
    263      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    264  
    265 -    dest[0] = otdata_r[output[0]];
    266 -    dest[1] = otdata_g[output[1]];
    267 -    dest[2] = otdata_b[output[2]];
    268 +    dest[r_out] = otdata_r[output[0]];
    269 +    dest[1]     = otdata_g[output[1]];
    270 +    dest[b_out] = otdata_b[output[2]];
    271  
    272      _mm_empty();
    273  }
    274 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
    275 index 6a5faf9..fa7f2d1 100644
    276 --- a/third_party/qcms/src/transform-sse2.c
    277 +++ b/third_party/qcms/src/transform-sse2.c
    278 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    279  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    280                                            unsigned char *src,
    281                                            unsigned char *dest,
    282 -                                          size_t length)
    283 +                                          size_t length,
    284 +                                          qcms_format_type output_format)
    285  {
    286      unsigned int i;
    287      float (*mat)[4] = transform->matrix;
    288 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    289  
    290      /* working variables */
    291      __m128 vec_r, vec_g, vec_b, result;
    292 +    const int r_out = output_format.r;
    293 +    const int b_out = output_format.b;
    294  
    295      /* CYA */
    296      if (!length)
    297 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    298          src += 3;
    299  
    300          /* use calc'd indices to output RGB values */
    301 -        dest[0] = otdata_r[output[0]];
    302 -        dest[1] = otdata_g[output[1]];
    303 -        dest[2] = otdata_b[output[2]];
    304 +        dest[r_out] = otdata_r[output[0]];
    305 +        dest[1]     = otdata_g[output[1]];
    306 +        dest[b_out] = otdata_b[output[2]];
    307          dest += 3;
    308      }
    309  
    310 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    311  
    312      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    313  
    314 -    dest[0] = otdata_r[output[0]];
    315 -    dest[1] = otdata_g[output[1]];
    316 -    dest[2] = otdata_b[output[2]];
    317 +    dest[r_out] = otdata_r[output[0]];
    318 +    dest[1]     = otdata_g[output[1]];
    319 +    dest[b_out] = otdata_b[output[2]];
    320  }
    321  
    322  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    323                                             unsigned char *src,
    324                                             unsigned char *dest,
    325 -                                           size_t length)
    326 +                                           size_t length,
    327 +                                           qcms_format_type output_format)
    328  {
    329      unsigned int i;
    330      float (*mat)[4] = transform->matrix;
    331 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    332  
    333      /* working variables */
    334      __m128 vec_r, vec_g, vec_b, result;
    335 +    const int r_out = output_format.r;
    336 +    const int b_out = output_format.b;
    337      unsigned char alpha;
    338  
    339      /* CYA */
    340 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    341          src += 4;
    342  
    343          /* use calc'd indices to output RGB values */
    344 -        dest[0] = otdata_r[output[0]];
    345 -        dest[1] = otdata_g[output[1]];
    346 -        dest[2] = otdata_b[output[2]];
    347 +        dest[r_out] = otdata_r[output[0]];
    348 +        dest[1]     = otdata_g[output[1]];
    349 +        dest[b_out] = otdata_b[output[2]];
    350          dest += 4;
    351      }
    352  
    353 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    354  
    355      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    356  
    357 -    dest[0] = otdata_r[output[0]];
    358 -    dest[1] = otdata_g[output[1]];
    359 -    dest[2] = otdata_b[output[2]];
    360 +    dest[r_out] = otdata_r[output[0]];
    361 +    dest[1]     = otdata_g[output[1]];
    362 +    dest[b_out] = otdata_b[output[2]];
    363  }
    364 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
    365 index 9a6562b..7e0ba2c 100644
    366 --- a/third_party/qcms/src/transform.c
    367 +++ b/third_party/qcms/src/transform.c
    368 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
    369  static struct matrix
    370  adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
    371  {
    372 +#if defined (_MSC_VER)
    373 +#pragma warning(push)
    374 +/* Disable double to float truncation warning 4305 */
    375 +#pragma warning(disable:4305)
    376 +#endif
    377  	struct matrix lam_rigg = {{ // Bradford matrix
    378  	                         {  0.8951,  0.2664, -0.1614 },
    379  	                         { -0.7502,  1.7135,  0.0367 },
    380  	                         {  0.0389, -0.0685,  1.0296 }
    381  	                         }};
    382 +#if defined (_MSC_VER)
    383 +/* Restore warnings */
    384 +#pragma warning(pop)
    385 +#endif
    386  	return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
    387  }
    388  
    389 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    390  }
    391  
    392  #if 0
    393 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    394 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    395  {
    396 +	const int r_out = output_format.r;
    397 +	const int b_out = output_format.b;
    398 +
    399  	int i;
    400  	float (*mat)[4] = transform->matrix;
    401  	for (i=0; i<length; i++) {
    402 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
    403  		float out_device_g = pow(out_linear_g, transform->out_gamma_g);
    404  		float out_device_b = pow(out_linear_b, transform->out_gamma_b);
    405  
    406 -		*dest++ = clamp_u8(255*out_device_r);
    407 -		*dest++ = clamp_u8(255*out_device_g);
    408 -		*dest++ = clamp_u8(255*out_device_b);
    409 +		dest[r_out] = clamp_u8(out_device_r*255);
    410 +		dest[1]     = clamp_u8(out_device_g*255);
    411 +		dest[b_out] = clamp_u8(out_device_b*255);
    412 +		dest += 3;
    413  	}
    414  }
    415  #endif
    416  
    417 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    418 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    419  {
    420 +	const int r_out = output_format.r;
    421 +	const int b_out = output_format.b;
    422 +
    423  	unsigned int i;
    424  	for (i = 0; i < length; i++) {
    425  		float out_device_r, out_device_g, out_device_b;
    426 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    427  
    428  		float linear = transform->input_gamma_table_gray[device];
    429  
    430 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    431 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    432  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    433  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    434  
    435 -		*dest++ = clamp_u8(out_device_r*255);
    436 -		*dest++ = clamp_u8(out_device_g*255);
    437 -		*dest++ = clamp_u8(out_device_b*255);
    438 +		dest[r_out] = clamp_u8(out_device_r*255);
    439 +		dest[1]     = clamp_u8(out_device_g*255);
    440 +		dest[b_out] = clamp_u8(out_device_b*255);
    441 +		dest += 3;
    442  	}
    443  }
    444  
    445 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    446  	See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
    447  */
    448  
    449 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    450 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    451  {
    452 +	const int r_out = output_format.r;
    453 +	const int b_out = output_format.b;
    454 +
    455  	unsigned int i;
    456  	for (i = 0; i < length; i++) {
    457  		float out_device_r, out_device_g, out_device_b;
    458 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
    459  
    460  		float linear = transform->input_gamma_table_gray[device];
    461  
    462 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    463 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    464  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    465  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    466  
    467 -		*dest++ = clamp_u8(out_device_r*255);
    468 -		*dest++ = clamp_u8(out_device_g*255);
    469 -		*dest++ = clamp_u8(out_device_b*255);
    470 -		*dest++ = alpha;
    471 +		dest[r_out] = clamp_u8(out_device_r*255);
    472 +		dest[1]     = clamp_u8(out_device_g*255);
    473 +		dest[b_out] = clamp_u8(out_device_b*255);
    474 +		dest[3]     = alpha;
    475 +		dest += 4;
    476  	}
    477  }
    478  
    479  
    480 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    481 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    482  {
    483 +	const int r_out = output_format.r;
    484 +	const int b_out = output_format.b;
    485 +
    486  	unsigned int i;
    487  	for (i = 0; i < length; i++) {
    488  		unsigned char device = *src++;
    489 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
    490  		/* we could round here... */
    491  		gray = linear * PRECACHE_OUTPUT_MAX;
    492  
    493 -		*dest++ = transform->output_table_r->data[gray];
    494 -		*dest++ = transform->output_table_g->data[gray];
    495 -		*dest++ = transform->output_table_b->data[gray];
    496 +		dest[r_out] = transform->output_table_r->data[gray];
    497 +		dest[1]     = transform->output_table_g->data[gray];
    498 +		dest[b_out] = transform->output_table_b->data[gray];
    499 +		dest += 3;
    500  	}
    501  }
    502  
    503 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    504 +
    505 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    506  {
    507 +	const int r_out = output_format.r;
    508 +	const int b_out = output_format.b;
    509 +
    510  	unsigned int i;
    511  	for (i = 0; i < length; i++) {
    512  		unsigned char device = *src++;
    513 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
    514  		/* we could round here... */
    515  		gray = linear * PRECACHE_OUTPUT_MAX;
    516  
    517 -		*dest++ = transform->output_table_r->data[gray];
    518 -		*dest++ = transform->output_table_g->data[gray];
    519 -		*dest++ = transform->output_table_b->data[gray];
    520 -		*dest++ = alpha;
    521 +		dest[r_out] = transform->output_table_r->data[gray];
    522 +		dest[1]     = transform->output_table_g->data[gray];
    523 +		dest[b_out] = transform->output_table_b->data[gray];
    524 +		dest[3]     = alpha;
    525 +		dest += 4;
    526  	}
    527  }
    528  
    529 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    530 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    531  {
    532 +	const int r_out = output_format.r;
    533 +	const int b_out = output_format.b;
    534 +
    535  	unsigned int i;
    536  	float (*mat)[4] = transform->matrix;
    537  	for (i = 0; i < length; i++) {
    538 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
    539  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    540  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    541  
    542 -		*dest++ = transform->output_table_r->data[r];
    543 -		*dest++ = transform->output_table_g->data[g];
    544 -		*dest++ = transform->output_table_b->data[b];
    545 +		dest[r_out] = transform->output_table_r->data[r];
    546 +		dest[1]     = transform->output_table_g->data[g];
    547 +		dest[b_out] = transform->output_table_b->data[b];
    548 +		dest += 3;
    549  	}
    550  }
    551  
    552 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    553 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    554  {
    555 +	const int r_out = output_format.r;
    556 +	const int b_out = output_format.b;
    557 +
    558  	unsigned int i;
    559  	float (*mat)[4] = transform->matrix;
    560  	for (i = 0; i < length; i++) {
    561 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
    562  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    563  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    564  
    565 -		*dest++ = transform->output_table_r->data[r];
    566 -		*dest++ = transform->output_table_g->data[g];
    567 -		*dest++ = transform->output_table_b->data[b];
    568 -		*dest++ = alpha;
    569 +		dest[r_out] = transform->output_table_r->data[r];
    570 +		dest[1]     = transform->output_table_g->data[g];
    571 +		dest[b_out] = transform->output_table_b->data[b];
    572 +		dest[3]     = alpha;
    573 +		dest += 4;
    574  	}
    575  }
    576  
    577  // Not used
    578  /* 
    579 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    580 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    581 +{
    582 +	const int r_out = output_format.r;
    583 +	const int b_out = output_format.b;
    584 +
    585  	unsigned int i;
    586  	int xy_len = 1;
    587  	int x_len = transform->grid_size;
    588 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
    589  		float b_y2 = lerp(b_x3, b_x4, y_d);
    590  		float clut_b = lerp(b_y1, b_y2, z_d);
    591  
    592 -		*dest++ = clamp_u8(clut_r*255.0f);
    593 -		*dest++ = clamp_u8(clut_g*255.0f);
    594 -		*dest++ = clamp_u8(clut_b*255.0f);
    595 -	}	
    596 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    597 +		dest[1]     = clamp_u8(clut_g*255.0f);
    598 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    599 +		dest += 3;
    600 +	}
    601  }
    602  */
    603  
    604  // Using lcms' tetra interpolation algorithm.
    605 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    606 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    607 +{
    608 +	const int r_out = output_format.r;
    609 +	const int b_out = output_format.b;
    610 +
    611  	unsigned int i;
    612  	int xy_len = 1;
    613  	int x_len = transform->grid_size;
    614 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
    615  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    616  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    617  
    618 -		*dest++ = clamp_u8(clut_r*255.0f);
    619 -		*dest++ = clamp_u8(clut_g*255.0f);
    620 -		*dest++ = clamp_u8(clut_b*255.0f);
    621 -		*dest++ = in_a;
    622 -	}	
    623 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    624 +		dest[1]     = clamp_u8(clut_g*255.0f);
    625 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    626 +		dest[3]     = in_a;
    627 +		dest += 4;
    628 +	}
    629  }
    630  
    631  // Using lcms' tetra interpolation code.
    632 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    633 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    634 +{
    635 +	const int r_out = output_format.r;
    636 +	const int b_out = output_format.b;
    637 +
    638  	unsigned int i;
    639  	int xy_len = 1;
    640  	int x_len = transform->grid_size;
    641 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
    642  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    643  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    644  
    645 -		*dest++ = clamp_u8(clut_r*255.0f);
    646 -		*dest++ = clamp_u8(clut_g*255.0f);
    647 -		*dest++ = clamp_u8(clut_b*255.0f);
    648 -	}	
    649 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    650 +		dest[1]     = clamp_u8(clut_g*255.0f);
    651 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    652 +		dest += 3;
    653 +	}
    654  }
    655  
    656 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    657 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    658  {
    659 +	const int r_out = output_format.r;
    660 +	const int b_out = output_format.b;
    661 +
    662  	unsigned int i;
    663  	float (*mat)[4] = transform->matrix;
    664  	for (i = 0; i < length; i++) {
    665 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
    666  		out_device_b = lut_interp_linear(out_linear_b, 
    667  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    668  
    669 -		*dest++ = clamp_u8(out_device_r*255);
    670 -		*dest++ = clamp_u8(out_device_g*255);
    671 -		*dest++ = clamp_u8(out_device_b*255);
    672 +		dest[r_out] = clamp_u8(out_device_r*255);
    673 +		dest[1]     = clamp_u8(out_device_g*255);
    674 +		dest[b_out] = clamp_u8(out_device_b*255);
    675 +		dest += 3;
    676  	}
    677  }
    678  
    679 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    680 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    681  {
    682 +	const int r_out = output_format.r;
    683 +	const int b_out = output_format.b;
    684 +
    685  	unsigned int i;
    686  	float (*mat)[4] = transform->matrix;
    687  	for (i = 0; i < length; i++) {
    688 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
    689  		out_device_b = lut_interp_linear(out_linear_b, 
    690  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    691  
    692 -		*dest++ = clamp_u8(out_device_r*255);
    693 -		*dest++ = clamp_u8(out_device_g*255);
    694 -		*dest++ = clamp_u8(out_device_b*255);
    695 -		*dest++ = alpha;
    696 +		dest[r_out] = clamp_u8(out_device_r*255);
    697 +		dest[1]     = clamp_u8(out_device_g*255);
    698 +		dest[b_out] = clamp_u8(out_device_b*255);
    699 +		dest[3]     = alpha;
    700 +		dest += 4;
    701  	}
    702  }
    703  
    704  #if 0
    705 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    706 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    707  {
    708 +	const int r_out = output_format.r;
    709 +	const int b_out = output_format.b;
    710 +
    711  	int i;
    712  	float (*mat)[4] = transform->matrix;
    713  	for (i = 0; i < length; i++) {
    714 @@ -787,16 +851,25 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
    715  		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
    716  		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
    717  
    718 -		*dest++ = clamp_u8(out_linear_r*255);
    719 -		*dest++ = clamp_u8(out_linear_g*255);
    720 -		*dest++ = clamp_u8(out_linear_b*255);
    721 +		dest[r_out] = clamp_u8(out_linear_r*255);
    722 +		dest[1]     = clamp_u8(out_linear_g*255);
    723 +		dest[b_out] = clamp_u8(out_linear_b*255);
    724 +		dest += 3;
    725  	}
    726  }
    727  #endif
    728  
    729 +/*
    730 + * If users create and destroy objects on different threads, even if the same
    731 + * objects aren't used on different threads at the same time, we can still run
    732 + * in to trouble with refcounts if they aren't atomic.
    733 + *
    734 + * This can lead to us prematurely deleting the precache if threads get unlucky
    735 + * and write the wrong value to the ref count.
    736 + */
    737  static struct precache_output *precache_reference(struct precache_output *p)
    738  {
    739 -	p->ref_count++;
    740 +	qcms_atomic_increment(p->ref_count);
    741  	return p;
    742  }
    743  
    744 @@ -810,12 +883,12 @@ static struct precache_output *precache_create()
    745  
    746  void precache_release(struct precache_output *p)
    747  {
    748 -	if (--p->ref_count == 0) {
    749 +	if (qcms_atomic_decrement(p->ref_count) == 0) {
    750  		free(p);
    751  	}
    752  }
    753  
    754 -#ifdef HAS_POSIX_MEMALIGN
    755 +#ifdef HAVE_POSIX_MEMALIGN
    756  static qcms_transform *transform_alloc(void)
    757  {
    758  	qcms_transform *t;
    759 @@ -994,13 +1067,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
    760  	if (profile->color_space != RGB_SIGNATURE)
    761  		return;
    762  
    763 -	/* don't precache since we will use the B2A LUT */
    764 -	if (profile->B2A0)
    765 -		return;
    766 +	if (qcms_supports_iccv4) {
    767 +		/* don't precache since we will use the B2A LUT */
    768 +		if (profile->B2A0)
    769 +			return;
    770  
    771 -	/* don't precache since we will use the mBA LUT */
    772 -	if (profile->mBA)
    773 -		return;
    774 +		/* don't precache since we will use the mBA LUT */
    775 +		if (profile->mBA)
    776 +			return;
    777 +	}
    778  
    779  	/* don't precache if we do not have the TRC curves */
    780  	if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
    781 @@ -1157,14 +1232,14 @@ qcms_transform* qcms_transform_create(
    782                  	return NULL;
    783              	}
    784  		if (precache) {
    785 -#ifdef X86
    786 +#if defined(SSE2_ENABLE) && defined(X86)
    787  		    if (sse_version_available() >= 2) {
    788  			    if (in_type == QCMS_DATA_RGB_8)
    789  				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
    790  			    else
    791  				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
    792  
    793 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
    794 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
    795                      /* Microsoft Compiler for x64 doesn't support MMX.
    796                       * SSE code uses MMX so that we disable on x64 */
    797  		    } else
    798 @@ -1256,13 +1331,34 @@ qcms_transform* qcms_transform_create(
    799  	return transform;
    800  }
    801  
    802 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
    803 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
    804 + * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
    805 + * of the attribute but is currently only supported by clang */
    806 +#if defined(__has_attribute)
    807 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
    808 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
    809 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
    810 +#else
    811 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
    812 +#endif
    813 +
    814 +#if HAS_FORCE_ALIGN_ARG_POINTER
    815  /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
    816  __attribute__((__force_align_arg_pointer__))
    817  #endif
    818  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
    819  {
    820 -	transform->transform_fn(transform, src, dest, length);
    821 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
    822 +
    823 +	transform->transform_fn(transform, src, dest, length, output_rgbx);
    824 +}
    825 +
    826 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
    827 +{
    828 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
    829 +	static const struct _qcms_format_type output_bgrx = { 2, 0 };
    830 +
    831 +	transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
    832  }
    833  
    834  qcms_bool qcms_supports_iccv4;
    835 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
    836 index e8447e5..f4338b2 100644
    837 --- a/third_party/qcms/src/transform_util.c
    838 +++ b/third_party/qcms/src/transform_util.c
    839 @@ -36,7 +36,7 @@
    840  
    841  /* value must be a value between 0 and 1 */
    842  //XXX: is the above a good restriction to have?
    843 -float lut_interp_linear(double value, uint16_t *table, int length)
    844 +float lut_interp_linear(double value, uint16_t *table, size_t length)
    845  {
    846  	int upper, lower;
    847  	value = value * (length - 1); // scale to length of the array
    848 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
    849  }
    850  
    851  /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
    852 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
    853 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
    854  {
    855  	/* Start scaling input_value to the length of the array: 65535*(length-1).
    856  	 * We'll divide out the 65535 next */
    857 -	uint32_t value = (input_value * (length - 1));
    858 +	uintptr_t value = (input_value * (length - 1));
    859  	uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
    860  	uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
    861  	/* interp is the distance from upper to value scaled to 0..65535 */
    862 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
    863  /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
    864   * and returns a uint8_t value representing a range from 0..1 */
    865  static
    866 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
    867 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
    868  {
    869  	/* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
    870  	 * We'll divide out the PRECACHE_OUTPUT_MAX next */
    871 -	uint32_t value = (input_value * (length - 1));
    872 +	uintptr_t value = (input_value * (length - 1));
    873  
    874  	/* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
    875  	uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
    876 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
    877  
    878  /* value must be a value between 0 and 1 */
    879  //XXX: is the above a good restriction to have?
    880 -float lut_interp_linear_float(float value, float *table, int length)
    881 +float lut_interp_linear_float(float value, float *table, size_t length)
    882  {
    883          int upper, lower;
    884          value = value * (length - 1);
    885 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
    886  	return x/256.;
    887  }
    888  
    889 +/* The SSE2 code uses min & max which let NaNs pass through.
    890 +   We want to try to prevent that here by ensuring that
    891 +   gamma table is within expected values. */
    892 +void validate_gamma_table(float gamma_table[256])
    893 +{
    894 +	int i;
    895 +	for (i = 0; i < 256; i++) {
    896 +		// Note: we check that the gamma is not in range
    897 +		// instead of out of range so that we catch NaNs
    898 +		if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
    899 +			gamma_table[i] = 0.f;
    900 +		}
    901 +	}
    902 +}
    903 +
    904  float *build_input_gamma_table(struct curveType *TRC)
    905  {
    906  	float *gamma_table;
    907 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
    908  			}
    909  		}
    910  	}
    911 -        return gamma_table;
    912 +
    913 +	validate_gamma_table(gamma_table);
    914 +
    915 +	return gamma_table;
    916  }
    917  
    918  struct matrix build_colorant_matrix(qcms_profile *p)
    919 @@ -390,7 +408,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
    920   which has an maximum error of about 9855 (pixel difference of ~38.346)
    921  
    922   For now, we punt the decision of output size to the caller. */
    923 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
    924 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
    925  {
    926          int i;
    927          /* for now we invert the lut by creating a lut of size out_length
    928 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
    929 index 8f358a8..de465f4 100644
    930 --- a/third_party/qcms/src/transform_util.h
    931 +++ b/third_party/qcms/src/transform_util.h
    932 @@ -31,9 +31,9 @@
    933  //XXX: could use a bettername
    934  typedef uint16_t uint16_fract_t;
    935  
    936 -float lut_interp_linear(double value, uint16_t *table, int length);
    937 -float lut_interp_linear_float(float value, float *table, int length);
    938 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
    939 +float lut_interp_linear(double value, uint16_t *table, size_t length);
    940 +float lut_interp_linear_float(float value, float *table, size_t length);
    941 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
    942  
    943  
    944  static inline float lerp(float a, float b, float t)
    945