Home | History | Annotate | Download | only in qcms
      1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
      2 index 36b7011..69b7141 100644
      3 --- a/third_party/qcms/src/iccread.c
      4 +++ b/third_party/qcms/src/iccread.c
      5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
      6         if (profile->color_space != RGB_SIGNATURE)
      7  	       return false;
      8  
      9 -       if (profile->A2B0 || profile->B2A0)
     10 +       if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
     11                 return false;
     12  
     13         rX = s15Fixed16Number_to_float(profile->redColorant.X);
     14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     15         sum[1] = rY + gY + bY;
     16         sum[2] = rZ + gZ + bZ;
     17  
     18 +#if defined (_MSC_VER)
     19 +#pragma warning(push)
     20 +/* Disable double to float truncation warning 4305 */
     21 +#pragma warning(disable:4305)
     22 +#endif
     23         // Build our target vector (see mozilla bug 460629)
     24         target[0] = 0.96420;
     25         target[1] = 1.00000;
     26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     27         tolerance[1] = 0.02;
     28         tolerance[2] = 0.04;
     29  
     30 +#if defined (_MSC_VER)
     31 +/* Restore warnings */
     32 +#pragma warning(pop)
     33 +#endif
     34         // Compare with our tolerance
     35         for (i = 0; i < 3; ++i) {
     36             if (!(((sum[i] - tolerance[i]) <= target[i]) &&
     37 @@ -331,6 +340,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     38  #define TAG_A2B0 0x41324230
     39  #define TAG_B2A0 0x42324130
     40  #define TAG_CHAD 0x63686164
     41 +#define TAG_desc 0x64657363
     42  
     43  static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
     44  {
     45 @@ -344,6 +354,47 @@ static struct tag *find_tag(struct tag_index index, uint32_t tag_id)
     46  	return tag;
     47  }
     48  
     49 +#define DESC_TYPE 0x64657363 // 'desc'
     50 +#define MLUC_TYPE 0x6d6c7563 // 'mluc'
     51 +
     52 +static bool read_tag_descType(qcms_profile *profile, struct mem_source *src, struct tag_index index, uint32_t tag_id)
     53 +{
     54 +	struct tag *tag = find_tag(index, tag_id);
     55 +	if (tag) {
     56 +		const uint32_t limit = sizeof profile->description;
     57 +		uint32_t offset = tag->offset;
     58 +		uint32_t type = read_u32(src, offset);
     59 +		uint32_t length = read_u32(src, offset+8);
     60 +		uint32_t i, description;
     61 +		if (length && type == MLUC_TYPE) {
     62 +			length = read_u32(src, offset+20);
     63 +			if (!length || (length & 1) || (read_u32(src, offset+12) != 12))
     64 +				goto invalid_desc_tag;
     65 +			description = offset + read_u32(src, offset+24);
     66 +			if (!src->valid)
     67 +				goto invalid_desc_tag;
     68 +		} else if (length && type == DESC_TYPE) {
     69 +			description = offset + 12;
     70 +		} else {
     71 +			goto invalid_desc_tag;
     72 +		}
     73 +		if (length >= limit)
     74 +			length = limit - 1;
     75 +		for (i = 0; i < length; ++i)
     76 +			profile->description[i] = read_u8(src, description+i);
     77 +		profile->description[length] = 0;
     78 +	} else {
     79 +		goto invalid_desc_tag;
     80 +	}
     81 +
     82 +	if (src->valid)
     83 +		return true;
     84 +
     85 +invalid_desc_tag:
     86 +	invalid_source(src, "invalid description");
     87 +	return false;
     88 +}
     89 +
     90  #define XYZ_TYPE		0x58595a20 // 'XYZ '
     91  #define CURVE_TYPE		0x63757276 // 'curv'
     92  #define PARAMETRIC_CURVE_TYPE	0x70617261 // 'para'
     93 @@ -402,7 +453,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
     94  // present that are not part of the tag_index.
     95  static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
     96  {
     97 -	static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     98 +	static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     99  	struct curveType *curve = NULL;
    100  	uint32_t type = read_u32(src, offset);
    101  	uint32_t count;
    102 @@ -484,19 +535,23 @@ static void read_nested_curveType(struct mem_source *src, struct curveType *(*cu
    103  	uint32_t channel_offset = 0;
    104  	int i;
    105  	for (i = 0; i < num_channels; i++) {
    106 -		uint32_t tag_len;
    107 +		uint32_t tag_len = ~0;
    108  
    109  		(*curveArray)[i] = read_curveType(src, curve_offset + channel_offset, &tag_len);
    110  		if (!(*curveArray)[i]) {
    111  			invalid_source(src, "invalid nested curveType curve");
    112  		}
    113  
    114 +		if (tag_len == ~0) {
    115 +			invalid_source(src, "invalid nested curveType tag length");
    116 +			return;
    117 +		}
    118 +
    119  		channel_offset += tag_len;
    120  		// 4 byte aligned
    121  		if ((tag_len % 4) != 0)
    122  			channel_offset += 4 - (tag_len % 4);
    123  	}
    124 -
    125  }
    126  
    127  static void mAB_release(struct lutmABType *lut)
    128 @@ -657,7 +712,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
    129  	uint16_t num_input_table_entries;
    130  	uint16_t num_output_table_entries;
    131  	uint8_t in_chan, grid_points, out_chan;
    132 -	uint32_t clut_offset, output_offset;
    133 +	size_t clut_offset, output_offset;
    134  	uint32_t clut_size;
    135  	size_t entry_size;
    136  	struct lutType *lut;
    137 @@ -979,6 +1034,9 @@ qcms_profile* qcms_profile_sRGB(void)
    138  		return NO_MEM_PROFILE;
    139  
    140  	profile = qcms_profile_create_rgb_with_table(D65, Rec709Primaries, table, 1024);
    141 +	if (profile)
    142 +		strcpy(profile->description, "sRGB IEC61966-2.1");
    143 +
    144  	free(table);
    145  	return profile;
    146  }
    147 @@ -997,6 +1055,9 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
    148  	source.size = size;
    149  	source.valid = true;
    150  
    151 +	if (size < 4)
    152 +		return INVALID_PROFILE;
    153 +
    154  	length = read_u32(src, 0);
    155  	if (length <= size) {
    156  		// shrink the area that we can read if appropriate
    157 @@ -1028,6 +1089,9 @@ qcms_profile* qcms_profile_from_memory(const void *mem, size_t size)
    158  	if (!src->valid || !index.tags)
    159  		goto invalid_tag_table;
    160  
    161 +	if (!read_tag_descType(profile, src, index, TAG_desc))
    162 +		goto invalid_tag_table;
    163 +
    164  	if (find_tag(index, TAG_CHAD)) {
    165  		profile->chromaticAdaption = read_tag_s15Fixed16ArrayType(src, index, TAG_CHAD);
    166  	} else {
    167 @@ -1098,6 +1162,11 @@ invalid_profile:
    168  	return INVALID_PROFILE;
    169  }
    170  
    171 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2)
    172 +{
    173 +    return memcmp(p1->description, p2->description, sizeof p1->description) == 0;
    174 +}
    175 +
    176  qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile)
    177  {
    178  	return profile->rendering_intent;
    179 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
    180 index 7d83623..e59528a 100644
    181 --- a/third_party/qcms/src/qcms.h
    182 +++ b/third_party/qcms/src/qcms.h
    183 @@ -40,6 +40,12 @@ sale, use or other dealings in this Software without written
    184  authorization from SunSoft Inc. 
    185  ******************************************************************/
    186  
    187 +/*
    188 + * QCMS, in general, is not threadsafe. However, it should be safe to create
    189 + * profile and transformation objects on different threads, so long as you
    190 + * don't use the same objects on different threads at the same time.
    191 + */
    192 +
    193  /* 
    194   * Color Space Signatures
    195   * Note that only icSigXYZData and icSigLabData are valid
    196 @@ -102,6 +108,12 @@ typedef enum {
    197  	QCMS_DATA_GRAYA_8
    198  } qcms_data_type;
    199  
    200 +/* Format of the output data for qcms_transform_data_type() */
    201 +typedef enum {
    202 +	QCMS_OUTPUT_RGBX,
    203 +	QCMS_OUTPUT_BGRX
    204 +} qcms_output_type;
    205 +
    206  /* the names for the following two types are sort of ugly */
    207  typedef struct
    208  {
    209 @@ -136,6 +148,8 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile);
    210  qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile);
    211  icColorSpaceSignature qcms_profile_get_color_space(qcms_profile *profile);
    212  
    213 +qcms_bool qcms_profile_match(qcms_profile *p1, qcms_profile *p2);
    214 +
    215  void qcms_profile_precache_output_transform(qcms_profile *profile);
    216  
    217  qcms_transform* qcms_transform_create(
    218 @@ -146,6 +160,7 @@ qcms_transform* qcms_transform_create(
    219  void qcms_transform_release(qcms_transform *);
    220  
    221  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
    222 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
    223  
    224  void qcms_enable_iccv4();
    225  
    226 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
    227 index 53a3420..4116ed5 100644
    228 --- a/third_party/qcms/src/qcmsint.h
    229 +++ b/third_party/qcms/src/qcmsint.h
    230 @@ -45,6 +45,11 @@ struct precache_output
    231  #define ALIGN __attribute__(( aligned (16) ))
    232  #endif
    233  
    234 +typedef struct _qcms_format_type {
    235 +	int r;
    236 +	int b;
    237 +} qcms_format_type;
    238 +
    239  struct _qcms_transform {
    240  	float ALIGN matrix[3][4];
    241  	float *input_gamma_table_r;
    242 @@ -88,7 +93,7 @@ struct _qcms_transform {
    243  	struct precache_output *output_table_g;
    244  	struct precache_output *output_table_b;
    245  
    246 -	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
    247 +	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
    248  };
    249  
    250  struct matrix {
    251 @@ -225,6 +230,7 @@ struct tag_value {
    252  #define LAB_SIGNATURE  0x4C616220
    253  
    254  struct _qcms_profile {
    255 +	char description[64];
    256  	uint32_t class;
    257  	uint32_t color_space;
    258  	uint32_t pcs;
    259 @@ -280,18 +286,40 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    260  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    261                                            unsigned char *src,
    262                                            unsigned char *dest,
    263 -                                          size_t length);
    264 +                                          size_t length,
    265 +                                          qcms_format_type output_format);
    266  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    267                                            unsigned char *src,
    268                                            unsigned char *dest,
    269 -                                          size_t length);
    270 +                                          size_t length,
    271 +                                          qcms_format_type output_format);
    272  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    273                                            unsigned char *src,
    274                                            unsigned char *dest,
    275 -                                          size_t length);
    276 +                                          size_t length,
    277 +                                          qcms_format_type output_format);
    278  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    279                                            unsigned char *src,
    280                                            unsigned char *dest,
    281 -                                          size_t length);
    282 +                                          size_t length,
    283 +                                          qcms_format_type output_format);
    284  
    285  extern qcms_bool qcms_supports_iccv4;
    286 +
    287 +
    288 +#ifdef _MSC_VER
    289 +
    290 +long __cdecl _InterlockedIncrement(long volatile *);
    291 +long __cdecl _InterlockedDecrement(long volatile *);
    292 +#pragma intrinsic(_InterlockedIncrement)
    293 +#pragma intrinsic(_InterlockedDecrement)
    294 +
    295 +#define qcms_atomic_increment(x) _InterlockedIncrement((long volatile *)&x)
    296 +#define qcms_atomic_decrement(x) _InterlockedDecrement((long volatile*)&x)
    297 +
    298 +#else
    299 +
    300 +#define qcms_atomic_increment(x) __sync_add_and_fetch(&x, 1)
    301 +#define qcms_atomic_decrement(x) __sync_sub_and_fetch(&x, 1)
    302 +
    303 +#endif
    304 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
    305 index 56d8de3..d58f691 100644
    306 --- a/third_party/qcms/src/qcmstypes.h
    307 +++ b/third_party/qcms/src/qcmstypes.h
    308 @@ -22,37 +22,6 @@
    309  #ifndef QCMS_TYPES_H
    310  #define QCMS_TYPES_H
    311  
    312 -#ifdef MOZ_QCMS
    313 -
    314 -#include "prtypes.h"
    315 -
    316 -/* prtypes.h defines IS_LITTLE_ENDIAN and IS_BIG ENDIAN */
    317 -
    318 -#if defined (__SVR4) && defined (__sun)
    319 -/* int_types.h gets included somehow, so avoid redefining the types differently */
    320 -#include <sys/int_types.h>
    321 -#elif defined (_AIX)
    322 -#include <sys/types.h>
    323 -#elif !defined(ANDROID) && !defined(__OpenBSD__)
    324 -typedef PRInt8 int8_t;
    325 -typedef PRUint8 uint8_t;
    326 -typedef PRInt16 int16_t;
    327 -typedef PRUint16 uint16_t;
    328 -typedef PRInt32 int32_t;
    329 -typedef PRUint32 uint32_t;
    330 -typedef PRInt64 int64_t;
    331 -typedef PRUint64 uint64_t;
    332 -
    333 -#ifdef __OS2__
    334 -/* OS/2's stdlib typdefs uintptr_t. So we'll just include that so we don't collide */
    335 -#include <stdlib.h>
    336 -#elif !defined(__intptr_t_defined) && !defined(_UINTPTR_T_DEFINED)
    337 -typedef PRUptrdiff uintptr_t;
    338 -#endif
    339 -#endif
    340 -
    341 -#else // MOZ_QCMS
    342 -
    343  #if BYTE_ORDER == LITTLE_ENDIAN
    344  #define IS_LITTLE_ENDIAN
    345  #elif BYTE_ORDER == BIG_ENDIAN
    346 @@ -75,7 +44,7 @@ typedef PRUptrdiff uintptr_t;
    347  
    348  #if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__)
    349  #  include <inttypes.h>
    350 -#elif defined (_MSC_VER)
    351 +#elif defined (_MSC_VER) && _MSC_VER < 1600
    352  typedef __int8 int8_t;
    353  typedef unsigned __int8 uint8_t;
    354  typedef __int16 int16_t;
    355 @@ -87,7 +56,12 @@ typedef unsigned __int64 uint64_t;
    356  #ifdef _WIN64
    357  typedef unsigned __int64 uintptr_t;
    358  #else
    359 +#pragma warning(push)
    360 +/* Disable benign redefinition of type warning 4142 */
    361 +#pragma warning(disable:4142)
    362  typedef unsigned long uintptr_t;
    363 +/* Restore warnings */
    364 +#pragma warning(pop)
    365  #endif
    366  
    367  #elif defined (_AIX)
    368 @@ -96,8 +70,6 @@ typedef unsigned long uintptr_t;
    369  #  include <stdint.h>
    370  #endif
    371  
    372 -#endif
    373 -
    374  typedef qcms_bool bool;
    375  #define true 1
    376  #define false 0
    377 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
    378 index 2f34db5..aaee1bf 100644
    379 --- a/third_party/qcms/src/transform-sse1.c
    380 +++ b/third_party/qcms/src/transform-sse1.c
    381 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    382  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    383                                            unsigned char *src,
    384                                            unsigned char *dest,
    385 -                                          size_t length)
    386 +                                          size_t length,
    387 +                                          qcms_format_type output_format)
    388  {
    389      unsigned int i;
    390      float (*mat)[4] = transform->matrix;
    391 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    392  
    393      /* working variables */
    394      __m128 vec_r, vec_g, vec_b, result;
    395 +    const int r_out = output_format.r;
    396 +    const int b_out = output_format.b;
    397  
    398      /* CYA */
    399      if (!length)
    400 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    401          src += 3;
    402  
    403          /* use calc'd indices to output RGB values */
    404 -        dest[0] = otdata_r[output[0]];
    405 -        dest[1] = otdata_g[output[1]];
    406 -        dest[2] = otdata_b[output[2]];
    407 +        dest[r_out] = otdata_r[output[0]];
    408 +        dest[1]     = otdata_g[output[1]];
    409 +        dest[b_out] = otdata_b[output[2]];
    410          dest += 3;
    411      }
    412  
    413 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    414      result = _mm_movehl_ps(result, result);
    415      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    416  
    417 -    dest[0] = otdata_r[output[0]];
    418 -    dest[1] = otdata_g[output[1]];
    419 -    dest[2] = otdata_b[output[2]];
    420 +    dest[r_out] = otdata_r[output[0]];
    421 +    dest[1]     = otdata_g[output[1]];
    422 +    dest[b_out] = otdata_b[output[2]];
    423  
    424      _mm_empty();
    425  }
    426 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    427  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    428                                             unsigned char *src,
    429                                             unsigned char *dest,
    430 -                                           size_t length)
    431 +                                           size_t length,
    432 +                                           qcms_format_type output_format)
    433  {
    434      unsigned int i;
    435      float (*mat)[4] = transform->matrix;
    436 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    437  
    438      /* working variables */
    439      __m128 vec_r, vec_g, vec_b, result;
    440 +    const int r_out = output_format.r;
    441 +    const int b_out = output_format.b;
    442      unsigned char alpha;
    443  
    444      /* CYA */
    445 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    446          src += 4;
    447  
    448          /* use calc'd indices to output RGB values */
    449 -        dest[0] = otdata_r[output[0]];
    450 -        dest[1] = otdata_g[output[1]];
    451 -        dest[2] = otdata_b[output[2]];
    452 +        dest[r_out] = otdata_r[output[0]];
    453 +        dest[1]     = otdata_g[output[1]];
    454 +        dest[b_out] = otdata_b[output[2]];
    455          dest += 4;
    456      }
    457  
    458 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    459      result = _mm_movehl_ps(result, result);
    460      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    461  
    462 -    dest[0] = otdata_r[output[0]];
    463 -    dest[1] = otdata_g[output[1]];
    464 -    dest[2] = otdata_b[output[2]];
    465 +    dest[r_out] = otdata_r[output[0]];
    466 +    dest[1]     = otdata_g[output[1]];
    467 +    dest[b_out] = otdata_b[output[2]];
    468  
    469      _mm_empty();
    470  }
    471 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
    472 index 6a5faf9..fa7f2d1 100644
    473 --- a/third_party/qcms/src/transform-sse2.c
    474 +++ b/third_party/qcms/src/transform-sse2.c
    475 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    476  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    477                                            unsigned char *src,
    478                                            unsigned char *dest,
    479 -                                          size_t length)
    480 +                                          size_t length,
    481 +                                          qcms_format_type output_format)
    482  {
    483      unsigned int i;
    484      float (*mat)[4] = transform->matrix;
    485 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    486  
    487      /* working variables */
    488      __m128 vec_r, vec_g, vec_b, result;
    489 +    const int r_out = output_format.r;
    490 +    const int b_out = output_format.b;
    491  
    492      /* CYA */
    493      if (!length)
    494 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    495          src += 3;
    496  
    497          /* use calc'd indices to output RGB values */
    498 -        dest[0] = otdata_r[output[0]];
    499 -        dest[1] = otdata_g[output[1]];
    500 -        dest[2] = otdata_b[output[2]];
    501 +        dest[r_out] = otdata_r[output[0]];
    502 +        dest[1]     = otdata_g[output[1]];
    503 +        dest[b_out] = otdata_b[output[2]];
    504          dest += 3;
    505      }
    506  
    507 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    508  
    509      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    510  
    511 -    dest[0] = otdata_r[output[0]];
    512 -    dest[1] = otdata_g[output[1]];
    513 -    dest[2] = otdata_b[output[2]];
    514 +    dest[r_out] = otdata_r[output[0]];
    515 +    dest[1]     = otdata_g[output[1]];
    516 +    dest[b_out] = otdata_b[output[2]];
    517  }
    518  
    519  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    520                                             unsigned char *src,
    521                                             unsigned char *dest,
    522 -                                           size_t length)
    523 +                                           size_t length,
    524 +                                           qcms_format_type output_format)
    525  {
    526      unsigned int i;
    527      float (*mat)[4] = transform->matrix;
    528 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    529  
    530      /* working variables */
    531      __m128 vec_r, vec_g, vec_b, result;
    532 +    const int r_out = output_format.r;
    533 +    const int b_out = output_format.b;
    534      unsigned char alpha;
    535  
    536      /* CYA */
    537 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    538          src += 4;
    539  
    540          /* use calc'd indices to output RGB values */
    541 -        dest[0] = otdata_r[output[0]];
    542 -        dest[1] = otdata_g[output[1]];
    543 -        dest[2] = otdata_b[output[2]];
    544 +        dest[r_out] = otdata_r[output[0]];
    545 +        dest[1]     = otdata_g[output[1]];
    546 +        dest[b_out] = otdata_b[output[2]];
    547          dest += 4;
    548      }
    549  
    550 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    551  
    552      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    553  
    554 -    dest[0] = otdata_r[output[0]];
    555 -    dest[1] = otdata_g[output[1]];
    556 -    dest[2] = otdata_b[output[2]];
    557 +    dest[r_out] = otdata_r[output[0]];
    558 +    dest[1]     = otdata_g[output[1]];
    559 +    dest[b_out] = otdata_b[output[2]];
    560  }
    561 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
    562 index 9a6562b..08db142 100644
    563 --- a/third_party/qcms/src/transform.c
    564 +++ b/third_party/qcms/src/transform.c
    565 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
    566  static struct matrix
    567  adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
    568  {
    569 +#if defined (_MSC_VER)
    570 +#pragma warning(push)
    571 +/* Disable double to float truncation warning 4305 */
    572 +#pragma warning(disable:4305)
    573 +#endif
    574  	struct matrix lam_rigg = {{ // Bradford matrix
    575  	                         {  0.8951,  0.2664, -0.1614 },
    576  	                         { -0.7502,  1.7135,  0.0367 },
    577  	                         {  0.0389, -0.0685,  1.0296 }
    578  	                         }};
    579 +#if defined (_MSC_VER)
    580 +/* Restore warnings */
    581 +#pragma warning(pop)
    582 +#endif
    583  	return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
    584  }
    585  
    586 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    587  }
    588  
    589  #if 0
    590 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    591 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    592  {
    593 +	const int r_out = output_format.r;
    594 +	const int b_out = output_format.b;
    595 +
    596  	int i;
    597  	float (*mat)[4] = transform->matrix;
    598  	for (i=0; i<length; i++) {
    599 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
    600  		float out_device_g = pow(out_linear_g, transform->out_gamma_g);
    601  		float out_device_b = pow(out_linear_b, transform->out_gamma_b);
    602  
    603 -		*dest++ = clamp_u8(255*out_device_r);
    604 -		*dest++ = clamp_u8(255*out_device_g);
    605 -		*dest++ = clamp_u8(255*out_device_b);
    606 +		dest[r_out] = clamp_u8(out_device_r*255);
    607 +		dest[1]     = clamp_u8(out_device_g*255);
    608 +		dest[b_out] = clamp_u8(out_device_b*255);
    609 +		dest += 3;
    610  	}
    611  }
    612  #endif
    613  
    614 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    615 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    616  {
    617 +	const int r_out = output_format.r;
    618 +	const int b_out = output_format.b;
    619 +
    620  	unsigned int i;
    621  	for (i = 0; i < length; i++) {
    622  		float out_device_r, out_device_g, out_device_b;
    623 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    624  
    625  		float linear = transform->input_gamma_table_gray[device];
    626  
    627 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    628 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    629  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    630  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    631  
    632 -		*dest++ = clamp_u8(out_device_r*255);
    633 -		*dest++ = clamp_u8(out_device_g*255);
    634 -		*dest++ = clamp_u8(out_device_b*255);
    635 +		dest[r_out] = clamp_u8(out_device_r*255);
    636 +		dest[1]     = clamp_u8(out_device_g*255);
    637 +		dest[b_out] = clamp_u8(out_device_b*255);
    638 +		dest += 3;
    639  	}
    640  }
    641  
    642 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    643  	See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
    644  */
    645  
    646 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    647 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    648  {
    649 +	const int r_out = output_format.r;
    650 +	const int b_out = output_format.b;
    651 +
    652  	unsigned int i;
    653  	for (i = 0; i < length; i++) {
    654  		float out_device_r, out_device_g, out_device_b;
    655 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
    656  
    657  		float linear = transform->input_gamma_table_gray[device];
    658  
    659 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    660 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    661  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    662  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    663  
    664 -		*dest++ = clamp_u8(out_device_r*255);
    665 -		*dest++ = clamp_u8(out_device_g*255);
    666 -		*dest++ = clamp_u8(out_device_b*255);
    667 -		*dest++ = alpha;
    668 +		dest[r_out] = clamp_u8(out_device_r*255);
    669 +		dest[1]     = clamp_u8(out_device_g*255);
    670 +		dest[b_out] = clamp_u8(out_device_b*255);
    671 +		dest[3]     = alpha;
    672 +		dest += 4;
    673  	}
    674  }
    675  
    676  
    677 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    678 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    679  {
    680 +	const int r_out = output_format.r;
    681 +	const int b_out = output_format.b;
    682 +
    683  	unsigned int i;
    684  	for (i = 0; i < length; i++) {
    685  		unsigned char device = *src++;
    686 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
    687  		/* we could round here... */
    688  		gray = linear * PRECACHE_OUTPUT_MAX;
    689  
    690 -		*dest++ = transform->output_table_r->data[gray];
    691 -		*dest++ = transform->output_table_g->data[gray];
    692 -		*dest++ = transform->output_table_b->data[gray];
    693 +		dest[r_out] = transform->output_table_r->data[gray];
    694 +		dest[1]     = transform->output_table_g->data[gray];
    695 +		dest[b_out] = transform->output_table_b->data[gray];
    696 +		dest += 3;
    697  	}
    698  }
    699  
    700 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    701 +
    702 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    703  {
    704 +	const int r_out = output_format.r;
    705 +	const int b_out = output_format.b;
    706 +
    707  	unsigned int i;
    708  	for (i = 0; i < length; i++) {
    709  		unsigned char device = *src++;
    710 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
    711  		/* we could round here... */
    712  		gray = linear * PRECACHE_OUTPUT_MAX;
    713  
    714 -		*dest++ = transform->output_table_r->data[gray];
    715 -		*dest++ = transform->output_table_g->data[gray];
    716 -		*dest++ = transform->output_table_b->data[gray];
    717 -		*dest++ = alpha;
    718 +		dest[r_out] = transform->output_table_r->data[gray];
    719 +		dest[1]     = transform->output_table_g->data[gray];
    720 +		dest[b_out] = transform->output_table_b->data[gray];
    721 +		dest[3]     = alpha;
    722 +		dest += 4;
    723  	}
    724  }
    725  
    726 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    727 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    728  {
    729 +	const int r_out = output_format.r;
    730 +	const int b_out = output_format.b;
    731 +
    732  	unsigned int i;
    733  	float (*mat)[4] = transform->matrix;
    734  	for (i = 0; i < length; i++) {
    735 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
    736  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    737  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    738  
    739 -		*dest++ = transform->output_table_r->data[r];
    740 -		*dest++ = transform->output_table_g->data[g];
    741 -		*dest++ = transform->output_table_b->data[b];
    742 +		dest[r_out] = transform->output_table_r->data[r];
    743 +		dest[1]     = transform->output_table_g->data[g];
    744 +		dest[b_out] = transform->output_table_b->data[b];
    745 +		dest += 3;
    746  	}
    747  }
    748  
    749 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    750 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    751  {
    752 +	const int r_out = output_format.r;
    753 +	const int b_out = output_format.b;
    754 +
    755  	unsigned int i;
    756  	float (*mat)[4] = transform->matrix;
    757  	for (i = 0; i < length; i++) {
    758 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
    759  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    760  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    761  
    762 -		*dest++ = transform->output_table_r->data[r];
    763 -		*dest++ = transform->output_table_g->data[g];
    764 -		*dest++ = transform->output_table_b->data[b];
    765 -		*dest++ = alpha;
    766 +		dest[r_out] = transform->output_table_r->data[r];
    767 +		dest[1]     = transform->output_table_g->data[g];
    768 +		dest[b_out] = transform->output_table_b->data[b];
    769 +		dest[3]     = alpha;
    770 +		dest += 4;
    771  	}
    772  }
    773  
    774  // Not used
    775  /* 
    776 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    777 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    778 +{
    779 +	const int r_out = output_format.r;
    780 +	const int b_out = output_format.b;
    781 +
    782  	unsigned int i;
    783  	int xy_len = 1;
    784  	int x_len = transform->grid_size;
    785 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
    786  		float b_y2 = lerp(b_x3, b_x4, y_d);
    787  		float clut_b = lerp(b_y1, b_y2, z_d);
    788  
    789 -		*dest++ = clamp_u8(clut_r*255.0f);
    790 -		*dest++ = clamp_u8(clut_g*255.0f);
    791 -		*dest++ = clamp_u8(clut_b*255.0f);
    792 -	}	
    793 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    794 +		dest[1]     = clamp_u8(clut_g*255.0f);
    795 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    796 +		dest += 3;
    797 +	}
    798  }
    799  */
    800  
    801  // Using lcms' tetra interpolation algorithm.
    802 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    803 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    804 +{
    805 +	const int r_out = output_format.r;
    806 +	const int b_out = output_format.b;
    807 +
    808  	unsigned int i;
    809  	int xy_len = 1;
    810  	int x_len = transform->grid_size;
    811 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
    812  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    813  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    814  
    815 -		*dest++ = clamp_u8(clut_r*255.0f);
    816 -		*dest++ = clamp_u8(clut_g*255.0f);
    817 -		*dest++ = clamp_u8(clut_b*255.0f);
    818 -		*dest++ = in_a;
    819 -	}	
    820 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    821 +		dest[1]     = clamp_u8(clut_g*255.0f);
    822 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    823 +		dest[3]     = in_a;
    824 +		dest += 4;
    825 +	}
    826  }
    827  
    828  // Using lcms' tetra interpolation code.
    829 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    830 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    831 +{
    832 +	const int r_out = output_format.r;
    833 +	const int b_out = output_format.b;
    834 +
    835  	unsigned int i;
    836  	int xy_len = 1;
    837  	int x_len = transform->grid_size;
    838 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
    839  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    840  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    841  
    842 -		*dest++ = clamp_u8(clut_r*255.0f);
    843 -		*dest++ = clamp_u8(clut_g*255.0f);
    844 -		*dest++ = clamp_u8(clut_b*255.0f);
    845 -	}	
    846 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    847 +		dest[1]     = clamp_u8(clut_g*255.0f);
    848 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    849 +		dest += 3;
    850 +	}
    851  }
    852  
    853 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    854 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    855  {
    856 +	const int r_out = output_format.r;
    857 +	const int b_out = output_format.b;
    858 +
    859  	unsigned int i;
    860  	float (*mat)[4] = transform->matrix;
    861  	for (i = 0; i < length; i++) {
    862 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
    863  		out_device_b = lut_interp_linear(out_linear_b, 
    864  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    865  
    866 -		*dest++ = clamp_u8(out_device_r*255);
    867 -		*dest++ = clamp_u8(out_device_g*255);
    868 -		*dest++ = clamp_u8(out_device_b*255);
    869 +		dest[r_out] = clamp_u8(out_device_r*255);
    870 +		dest[1]     = clamp_u8(out_device_g*255);
    871 +		dest[b_out] = clamp_u8(out_device_b*255);
    872 +		dest += 3;
    873  	}
    874  }
    875  
    876 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    877 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    878  {
    879 +	const int r_out = output_format.r;
    880 +	const int b_out = output_format.b;
    881 +
    882  	unsigned int i;
    883  	float (*mat)[4] = transform->matrix;
    884  	for (i = 0; i < length; i++) {
    885 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
    886  		out_device_b = lut_interp_linear(out_linear_b, 
    887  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    888  
    889 -		*dest++ = clamp_u8(out_device_r*255);
    890 -		*dest++ = clamp_u8(out_device_g*255);
    891 -		*dest++ = clamp_u8(out_device_b*255);
    892 -		*dest++ = alpha;
    893 +		dest[r_out] = clamp_u8(out_device_r*255);
    894 +		dest[1]     = clamp_u8(out_device_g*255);
    895 +		dest[b_out] = clamp_u8(out_device_b*255);
    896 +		dest[3]     = alpha;
    897 +		dest += 4;
    898  	}
    899  }
    900  
    901  #if 0
    902 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    903 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    904  {
    905 +	const int r_out = output_format.r;
    906 +	const int b_out = output_format.b;
    907 +
    908  	int i;
    909  	float (*mat)[4] = transform->matrix;
    910  	for (i = 0; i < length; i++) {
    911 @@ -787,16 +851,25 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
    912  		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
    913  		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
    914  
    915 -		*dest++ = clamp_u8(out_linear_r*255);
    916 -		*dest++ = clamp_u8(out_linear_g*255);
    917 -		*dest++ = clamp_u8(out_linear_b*255);
    918 +		dest[r_out] = clamp_u8(out_linear_r*255);
    919 +		dest[1]     = clamp_u8(out_linear_g*255);
    920 +		dest[b_out] = clamp_u8(out_linear_b*255);
    921 +		dest += 3;
    922  	}
    923  }
    924  #endif
    925  
    926 +/*
    927 + * If users create and destroy objects on different threads, even if the same
    928 + * objects aren't used on different threads at the same time, we can still run
    929 + * in to trouble with refcounts if they aren't atomic.
    930 + *
    931 + * This can lead to us prematurely deleting the precache if threads get unlucky
    932 + * and write the wrong value to the ref count.
    933 + */
    934  static struct precache_output *precache_reference(struct precache_output *p)
    935  {
    936 -	p->ref_count++;
    937 +	qcms_atomic_increment(p->ref_count);
    938  	return p;
    939  }
    940  
    941 @@ -810,12 +883,12 @@ static struct precache_output *precache_create()
    942  
    943  void precache_release(struct precache_output *p)
    944  {
    945 -	if (--p->ref_count == 0) {
    946 +	if (qcms_atomic_decrement(p->ref_count) == 0) {
    947  		free(p);
    948  	}
    949  }
    950  
    951 -#ifdef HAS_POSIX_MEMALIGN
    952 +#ifdef HAVE_POSIX_MEMALIGN
    953  static qcms_transform *transform_alloc(void)
    954  {
    955  	qcms_transform *t;
    956 @@ -994,13 +1067,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
    957  	if (profile->color_space != RGB_SIGNATURE)
    958  		return;
    959  
    960 -	/* don't precache since we will use the B2A LUT */
    961 -	if (profile->B2A0)
    962 -		return;
    963 +	if (qcms_supports_iccv4) {
    964 +		/* don't precache since we will use the B2A LUT */
    965 +		if (profile->B2A0)
    966 +			return;
    967  
    968 -	/* don't precache since we will use the mBA LUT */
    969 -	if (profile->mBA)
    970 -		return;
    971 +		/* don't precache since we will use the mBA LUT */
    972 +		if (profile->mBA)
    973 +			return;
    974 +	}
    975  
    976  	/* don't precache if we do not have the TRC curves */
    977  	if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
    978 @@ -1078,7 +1153,8 @@ qcms_transform* qcms_transform_precacheLUT_float(qcms_transform *transform, qcms
    979  	//XXX: qcms_modular_transform_data may return either the src or dest buffer. If so it must not be free-ed
    980  	if (src && lut != src) {
    981  		free(src);
    982 -	} else if (dest && lut != src) {
    983 +	}
    984 +	if (dest && lut != dest) {
    985  		free(dest);
    986  	}
    987  
    988 @@ -1157,14 +1233,14 @@ qcms_transform* qcms_transform_create(
    989                  	return NULL;
    990              	}
    991  		if (precache) {
    992 -#ifdef X86
    993 +#if defined(SSE2_ENABLE) && defined(X86)
    994  		    if (sse_version_available() >= 2) {
    995  			    if (in_type == QCMS_DATA_RGB_8)
    996  				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
    997  			    else
    998  				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
    999  
   1000 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
   1001 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
   1002                      /* Microsoft Compiler for x64 doesn't support MMX.
   1003                       * SSE code uses MMX so that we disable on x64 */
   1004  		    } else
   1005 @@ -1256,13 +1332,34 @@ qcms_transform* qcms_transform_create(
   1006  	return transform;
   1007  }
   1008  
   1009 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
   1010 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
   1011 + * attributes. Don't use this on ARM or AMD64. __has_attribute can detect the presence
   1012 + * of the attribute but is currently only supported by clang */
   1013 +#if defined(__has_attribute)
   1014 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
   1015 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
   1016 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
   1017 +#else
   1018 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
   1019 +#endif
   1020 +
   1021 +#if HAS_FORCE_ALIGN_ARG_POINTER
   1022  /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
   1023  __attribute__((__force_align_arg_pointer__))
   1024  #endif
   1025  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
   1026  {
   1027 -	transform->transform_fn(transform, src, dest, length);
   1028 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
   1029 +
   1030 +	transform->transform_fn(transform, src, dest, length, output_rgbx);
   1031 +}
   1032 +
   1033 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
   1034 +{
   1035 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
   1036 +	static const struct _qcms_format_type output_bgrx = { 2, 0 };
   1037 +
   1038 +	transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
   1039  }
   1040  
   1041  qcms_bool qcms_supports_iccv4;
   1042 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
   1043 index e8447e5..f4338b2 100644
   1044 --- a/third_party/qcms/src/transform_util.c
   1045 +++ b/third_party/qcms/src/transform_util.c
   1046 @@ -36,7 +36,7 @@
   1047  
   1048  /* value must be a value between 0 and 1 */
   1049  //XXX: is the above a good restriction to have?
   1050 -float lut_interp_linear(double value, uint16_t *table, int length)
   1051 +float lut_interp_linear(double value, uint16_t *table, size_t length)
   1052  {
   1053  	int upper, lower;
   1054  	value = value * (length - 1); // scale to length of the array
   1055 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
   1056  }
   1057  
   1058  /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
   1059 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
   1060 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
   1061  {
   1062  	/* Start scaling input_value to the length of the array: 65535*(length-1).
   1063  	 * We'll divide out the 65535 next */
   1064 -	uint32_t value = (input_value * (length - 1));
   1065 +	uintptr_t value = (input_value * (length - 1));
   1066  	uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
   1067  	uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
   1068  	/* interp is the distance from upper to value scaled to 0..65535 */
   1069 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
   1070  /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
   1071   * and returns a uint8_t value representing a range from 0..1 */
   1072  static
   1073 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
   1074 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
   1075  {
   1076  	/* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
   1077  	 * We'll divide out the PRECACHE_OUTPUT_MAX next */
   1078 -	uint32_t value = (input_value * (length - 1));
   1079 +	uintptr_t value = (input_value * (length - 1));
   1080  
   1081  	/* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
   1082  	uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
   1083 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
   1084  
   1085  /* value must be a value between 0 and 1 */
   1086  //XXX: is the above a good restriction to have?
   1087 -float lut_interp_linear_float(float value, float *table, int length)
   1088 +float lut_interp_linear_float(float value, float *table, size_t length)
   1089  {
   1090          int upper, lower;
   1091          value = value * (length - 1);
   1092 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
   1093  	return x/256.;
   1094  }
   1095  
   1096 +/* The SSE2 code uses min & max which let NaNs pass through.
   1097 +   We want to try to prevent that here by ensuring that
   1098 +   gamma table is within expected values. */
   1099 +void validate_gamma_table(float gamma_table[256])
   1100 +{
   1101 +	int i;
   1102 +	for (i = 0; i < 256; i++) {
   1103 +		// Note: we check that the gamma is not in range
   1104 +		// instead of out of range so that we catch NaNs
   1105 +		if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
   1106 +			gamma_table[i] = 0.f;
   1107 +		}
   1108 +	}
   1109 +}
   1110 +
   1111  float *build_input_gamma_table(struct curveType *TRC)
   1112  {
   1113  	float *gamma_table;
   1114 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
   1115  			}
   1116  		}
   1117  	}
   1118 -        return gamma_table;
   1119 +
   1120 +	validate_gamma_table(gamma_table);
   1121 +
   1122 +	return gamma_table;
   1123  }
   1124  
   1125  struct matrix build_colorant_matrix(qcms_profile *p)
   1126 @@ -390,7 +408,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
   1127   which has an maximum error of about 9855 (pixel difference of ~38.346)
   1128  
   1129   For now, we punt the decision of output size to the caller. */
   1130 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
   1131 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
   1132  {
   1133          int i;
   1134          /* for now we invert the lut by creating a lut of size out_length
   1135 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
   1136 index 8f358a8..de465f4 100644
   1137 --- a/third_party/qcms/src/transform_util.h
   1138 +++ b/third_party/qcms/src/transform_util.h
   1139 @@ -31,9 +31,9 @@
   1140  //XXX: could use a bettername
   1141  typedef uint16_t uint16_fract_t;
   1142  
   1143 -float lut_interp_linear(double value, uint16_t *table, int length);
   1144 -float lut_interp_linear_float(float value, float *table, int length);
   1145 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
   1146 +float lut_interp_linear(double value, uint16_t *table, size_t length);
   1147 +float lut_interp_linear_float(float value, float *table, size_t length);
   1148 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
   1149  
   1150  
   1151  static inline float lerp(float a, float b, float t)
   1152