Home | History | Annotate | Download | only in qcms
      1 diff --git a/third_party/qcms/src/iccread.c b/third_party/qcms/src/iccread.c
      2 index 36b7011..d3c3dfe 100644
      3 --- a/third_party/qcms/src/iccread.c
      4 +++ b/third_party/qcms/src/iccread.c
      5 @@ -266,7 +266,7 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
      6         if (profile->color_space != RGB_SIGNATURE)
      7  	       return false;
      8  
      9 -       if (profile->A2B0 || profile->B2A0)
     10 +       if (qcms_supports_iccv4 && (profile->A2B0 || profile->B2A0))
     11                 return false;
     12  
     13         rX = s15Fixed16Number_to_float(profile->redColorant.X);
     14 @@ -297,6 +297,11 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     15         sum[1] = rY + gY + bY;
     16         sum[2] = rZ + gZ + bZ;
     17  
     18 +#if defined (_MSC_VER)
     19 +#pragma warning(push)
     20 +/* Disable double to float truncation warning 4305 */
     21 +#pragma warning(disable:4305)
     22 +#endif
     23         // Build our target vector (see mozilla bug 460629)
     24         target[0] = 0.96420;
     25         target[1] = 1.00000;
     26 @@ -310,6 +315,10 @@ qcms_bool qcms_profile_is_bogus(qcms_profile *profile)
     27         tolerance[1] = 0.02;
     28         tolerance[2] = 0.04;
     29  
     30 +#if defined (_MSC_VER)
     31 +/* Restore warnings */
     32 +#pragma warning(pop)
     33 +#endif
     34         // Compare with our tolerance
     35         for (i = 0; i < 3; ++i) {
     36             if (!(((sum[i] - tolerance[i]) <= target[i]) &&
     37 @@ -402,7 +411,7 @@ static struct XYZNumber read_tag_XYZType(struct mem_source *src, struct tag_inde
     38  // present that are not part of the tag_index.
     39  static struct curveType *read_curveType(struct mem_source *src, uint32_t offset, uint32_t *len)
     40  {
     41 -	static const size_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     42 +	static const uint32_t COUNT_TO_LENGTH[5] = {1, 3, 4, 5, 7};
     43  	struct curveType *curve = NULL;
     44  	uint32_t type = read_u32(src, offset);
     45  	uint32_t count;
     46 @@ -657,7 +666,7 @@ static struct lutType *read_tag_lutType(struct mem_source *src, struct tag_index
     47  	uint16_t num_input_table_entries;
     48  	uint16_t num_output_table_entries;
     49  	uint8_t in_chan, grid_points, out_chan;
     50 -	uint32_t clut_offset, output_offset;
     51 +	size_t clut_offset, output_offset;
     52  	uint32_t clut_size;
     53  	size_t entry_size;
     54  	struct lutType *lut;
     55 diff --git a/third_party/qcms/src/qcms.h b/third_party/qcms/src/qcms.h
     56 index 7d83623..1e3e125 100644
     57 --- a/third_party/qcms/src/qcms.h
     58 +++ b/third_party/qcms/src/qcms.h
     59 @@ -102,6 +102,12 @@ typedef enum {
     60  	QCMS_DATA_GRAYA_8
     61  } qcms_data_type;
     62  
     63 +/* Format of the output data for qcms_transform_data_type() */
     64 +typedef enum {
     65 +	QCMS_OUTPUT_RGBX,
     66 +	QCMS_OUTPUT_BGRX
     67 +} qcms_output_type;
     68 +
     69  /* the names for the following two types are sort of ugly */
     70  typedef struct
     71  {
     72 @@ -146,6 +152,7 @@ qcms_transform* qcms_transform_create(
     73  void qcms_transform_release(qcms_transform *);
     74  
     75  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length);
     76 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type);
     77  
     78  void qcms_enable_iccv4();
     79  
     80 diff --git a/third_party/qcms/src/qcmsint.h b/third_party/qcms/src/qcmsint.h
     81 index 53a3420..63905de 100644
     82 --- a/third_party/qcms/src/qcmsint.h
     83 +++ b/third_party/qcms/src/qcmsint.h
     84 @@ -45,6 +45,11 @@ struct precache_output
     85  #define ALIGN __attribute__(( aligned (16) ))
     86  #endif
     87  
     88 +typedef struct _qcms_format_type {
     89 +	int r;
     90 +	int b;
     91 +} qcms_format_type;
     92 +
     93  struct _qcms_transform {
     94  	float ALIGN matrix[3][4];
     95  	float *input_gamma_table_r;
     96 @@ -88,7 +93,7 @@ struct _qcms_transform {
     97  	struct precache_output *output_table_g;
     98  	struct precache_output *output_table_b;
     99  
    100 -	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length);
    101 +	void (*transform_fn)(struct _qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, struct _qcms_format_type output_format);
    102  };
    103  
    104  struct matrix {
    105 @@ -280,18 +285,22 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    106  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    107                                            unsigned char *src,
    108                                            unsigned char *dest,
    109 -                                          size_t length);
    110 +                                          size_t length,
    111 +                                          qcms_format_type output_format);
    112  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    113                                            unsigned char *src,
    114                                            unsigned char *dest,
    115 -                                          size_t length);
    116 +                                          size_t length,
    117 +                                          qcms_format_type output_format);
    118  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    119                                            unsigned char *src,
    120                                            unsigned char *dest,
    121 -                                          size_t length);
    122 +                                          size_t length,
    123 +                                          qcms_format_type output_format);
    124  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    125                                            unsigned char *src,
    126                                            unsigned char *dest,
    127 -                                          size_t length);
    128 +                                          size_t length,
    129 +                                          qcms_format_type output_format);
    130  
    131  extern qcms_bool qcms_supports_iccv4;
    132 diff --git a/third_party/qcms/src/qcmstypes.h b/third_party/qcms/src/qcmstypes.h
    133 index 56d8de3..9a9b197 100644
    134 --- a/third_party/qcms/src/qcmstypes.h
    135 +++ b/third_party/qcms/src/qcmstypes.h
    136 @@ -87,7 +87,12 @@ typedef unsigned __int64 uint64_t;
    137  #ifdef _WIN64
    138  typedef unsigned __int64 uintptr_t;
    139  #else
    140 +#pragma warning(push)
    141 +/* Disable benign redefinition of type warning 4142 */
    142 +#pragma warning(disable:4142)
    143  typedef unsigned long uintptr_t;
    144 +/* Restore warnings */
    145 +#pragma warning(pop)
    146  #endif
    147  
    148  #elif defined (_AIX)
    149 diff --git a/third_party/qcms/src/transform-sse1.c b/third_party/qcms/src/transform-sse1.c
    150 index 2f34db5..aaee1bf 100644
    151 --- a/third_party/qcms/src/transform-sse1.c
    152 +++ b/third_party/qcms/src/transform-sse1.c
    153 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    154  void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    155                                            unsigned char *src,
    156                                            unsigned char *dest,
    157 -                                          size_t length)
    158 +                                          size_t length,
    159 +                                          qcms_format_type output_format)
    160  {
    161      unsigned int i;
    162      float (*mat)[4] = transform->matrix;
    163 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    164  
    165      /* working variables */
    166      __m128 vec_r, vec_g, vec_b, result;
    167 +    const int r_out = output_format.r;
    168 +    const int b_out = output_format.b;
    169  
    170      /* CYA */
    171      if (!length)
    172 @@ -116,9 +119,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    173          src += 3;
    174  
    175          /* use calc'd indices to output RGB values */
    176 -        dest[0] = otdata_r[output[0]];
    177 -        dest[1] = otdata_g[output[1]];
    178 -        dest[2] = otdata_b[output[2]];
    179 +        dest[r_out] = otdata_r[output[0]];
    180 +        dest[1]     = otdata_g[output[1]];
    181 +        dest[b_out] = otdata_b[output[2]];
    182          dest += 3;
    183      }
    184  
    185 @@ -141,9 +144,9 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    186      result = _mm_movehl_ps(result, result);
    187      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    188  
    189 -    dest[0] = otdata_r[output[0]];
    190 -    dest[1] = otdata_g[output[1]];
    191 -    dest[2] = otdata_b[output[2]];
    192 +    dest[r_out] = otdata_r[output[0]];
    193 +    dest[1]     = otdata_g[output[1]];
    194 +    dest[b_out] = otdata_b[output[2]];
    195  
    196      _mm_empty();
    197  }
    198 @@ -151,7 +154,8 @@ void qcms_transform_data_rgb_out_lut_sse1(qcms_transform *transform,
    199  void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    200                                             unsigned char *src,
    201                                             unsigned char *dest,
    202 -                                           size_t length)
    203 +                                           size_t length,
    204 +                                           qcms_format_type output_format)
    205  {
    206      unsigned int i;
    207      float (*mat)[4] = transform->matrix;
    208 @@ -187,6 +191,8 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    209  
    210      /* working variables */
    211      __m128 vec_r, vec_g, vec_b, result;
    212 +    const int r_out = output_format.r;
    213 +    const int b_out = output_format.b;
    214      unsigned char alpha;
    215  
    216      /* CYA */
    217 @@ -239,9 +245,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    218          src += 4;
    219  
    220          /* use calc'd indices to output RGB values */
    221 -        dest[0] = otdata_r[output[0]];
    222 -        dest[1] = otdata_g[output[1]];
    223 -        dest[2] = otdata_b[output[2]];
    224 +        dest[r_out] = otdata_r[output[0]];
    225 +        dest[1]     = otdata_g[output[1]];
    226 +        dest[b_out] = otdata_b[output[2]];
    227          dest += 4;
    228      }
    229  
    230 @@ -266,9 +272,9 @@ void qcms_transform_data_rgba_out_lut_sse1(qcms_transform *transform,
    231      result = _mm_movehl_ps(result, result);
    232      *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
    233  
    234 -    dest[0] = otdata_r[output[0]];
    235 -    dest[1] = otdata_g[output[1]];
    236 -    dest[2] = otdata_b[output[2]];
    237 +    dest[r_out] = otdata_r[output[0]];
    238 +    dest[1]     = otdata_g[output[1]];
    239 +    dest[b_out] = otdata_b[output[2]];
    240  
    241      _mm_empty();
    242  }
    243 diff --git a/third_party/qcms/src/transform-sse2.c b/third_party/qcms/src/transform-sse2.c
    244 index 6a5faf9..fa7f2d1 100644
    245 --- a/third_party/qcms/src/transform-sse2.c
    246 +++ b/third_party/qcms/src/transform-sse2.c
    247 @@ -34,7 +34,8 @@ static const ALIGN float clampMaxValueX4[4] =
    248  void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    249                                            unsigned char *src,
    250                                            unsigned char *dest,
    251 -                                          size_t length)
    252 +                                          size_t length,
    253 +                                          qcms_format_type output_format)
    254  {
    255      unsigned int i;
    256      float (*mat)[4] = transform->matrix;
    257 @@ -70,6 +71,8 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    258  
    259      /* working variables */
    260      __m128 vec_r, vec_g, vec_b, result;
    261 +    const int r_out = output_format.r;
    262 +    const int b_out = output_format.b;
    263  
    264      /* CYA */
    265      if (!length)
    266 @@ -114,9 +117,9 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    267          src += 3;
    268  
    269          /* use calc'd indices to output RGB values */
    270 -        dest[0] = otdata_r[output[0]];
    271 -        dest[1] = otdata_g[output[1]];
    272 -        dest[2] = otdata_b[output[2]];
    273 +        dest[r_out] = otdata_r[output[0]];
    274 +        dest[1]     = otdata_g[output[1]];
    275 +        dest[b_out] = otdata_b[output[2]];
    276          dest += 3;
    277      }
    278  
    279 @@ -137,15 +140,16 @@ void qcms_transform_data_rgb_out_lut_sse2(qcms_transform *transform,
    280  
    281      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    282  
    283 -    dest[0] = otdata_r[output[0]];
    284 -    dest[1] = otdata_g[output[1]];
    285 -    dest[2] = otdata_b[output[2]];
    286 +    dest[r_out] = otdata_r[output[0]];
    287 +    dest[1]     = otdata_g[output[1]];
    288 +    dest[b_out] = otdata_b[output[2]];
    289  }
    290  
    291  void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    292                                             unsigned char *src,
    293                                             unsigned char *dest,
    294 -                                           size_t length)
    295 +                                           size_t length,
    296 +                                           qcms_format_type output_format)
    297  {
    298      unsigned int i;
    299      float (*mat)[4] = transform->matrix;
    300 @@ -181,6 +185,8 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    301  
    302      /* working variables */
    303      __m128 vec_r, vec_g, vec_b, result;
    304 +    const int r_out = output_format.r;
    305 +    const int b_out = output_format.b;
    306      unsigned char alpha;
    307  
    308      /* CYA */
    309 @@ -231,9 +237,9 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    310          src += 4;
    311  
    312          /* use calc'd indices to output RGB values */
    313 -        dest[0] = otdata_r[output[0]];
    314 -        dest[1] = otdata_g[output[1]];
    315 -        dest[2] = otdata_b[output[2]];
    316 +        dest[r_out] = otdata_r[output[0]];
    317 +        dest[1]     = otdata_g[output[1]];
    318 +        dest[b_out] = otdata_b[output[2]];
    319          dest += 4;
    320      }
    321  
    322 @@ -256,7 +262,7 @@ void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
    323  
    324      _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
    325  
    326 -    dest[0] = otdata_r[output[0]];
    327 -    dest[1] = otdata_g[output[1]];
    328 -    dest[2] = otdata_b[output[2]];
    329 +    dest[r_out] = otdata_r[output[0]];
    330 +    dest[1]     = otdata_g[output[1]];
    331 +    dest[b_out] = otdata_b[output[2]];
    332  }
    333 diff --git a/third_party/qcms/src/transform.c b/third_party/qcms/src/transform.c
    334 index 9a6562b..7312ced 100644
    335 --- a/third_party/qcms/src/transform.c
    336 +++ b/third_party/qcms/src/transform.c
    337 @@ -181,11 +181,20 @@ compute_chromatic_adaption(struct CIE_XYZ source_white_point,
    338  static struct matrix
    339  adaption_matrix(struct CIE_XYZ source_illumination, struct CIE_XYZ target_illumination)
    340  {
    341 +#if defined (_MSC_VER)
    342 +#pragma warning(push)
    343 +/* Disable double to float truncation warning 4305 */
    344 +#pragma warning(disable:4305)
    345 +#endif
    346  	struct matrix lam_rigg = {{ // Bradford matrix
    347  	                         {  0.8951,  0.2664, -0.1614 },
    348  	                         { -0.7502,  1.7135,  0.0367 },
    349  	                         {  0.0389, -0.0685,  1.0296 }
    350  	                         }};
    351 +#if defined (_MSC_VER)
    352 +/* Restore warnings */
    353 +#pragma warning(pop)
    354 +#endif
    355  	return compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg);
    356  }
    357  
    358 @@ -230,8 +239,11 @@ qcms_bool set_rgb_colorants(qcms_profile *profile, qcms_CIE_xyY white_point, qcm
    359  }
    360  
    361  #if 0
    362 -static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    363 +static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    364  {
    365 +	const int r_out = output_format.r;
    366 +	const int b_out = output_format.b;
    367 +
    368  	int i;
    369  	float (*mat)[4] = transform->matrix;
    370  	for (i=0; i<length; i++) {
    371 @@ -251,15 +263,19 @@ static void qcms_transform_data_rgb_out_pow(qcms_transform *transform, unsigned
    372  		float out_device_g = pow(out_linear_g, transform->out_gamma_g);
    373  		float out_device_b = pow(out_linear_b, transform->out_gamma_b);
    374  
    375 -		*dest++ = clamp_u8(255*out_device_r);
    376 -		*dest++ = clamp_u8(255*out_device_g);
    377 -		*dest++ = clamp_u8(255*out_device_b);
    378 +		dest[r_out] = clamp_u8(out_device_r*255);
    379 +		dest[1]     = clamp_u8(out_device_g*255);
    380 +		dest[b_out] = clamp_u8(out_device_b*255);
    381 +		dest += 3;
    382  	}
    383  }
    384  #endif
    385  
    386 -static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    387 +static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    388  {
    389 +	const int r_out = output_format.r;
    390 +	const int b_out = output_format.b;
    391 +
    392  	unsigned int i;
    393  	for (i = 0; i < length; i++) {
    394  		float out_device_r, out_device_g, out_device_b;
    395 @@ -267,13 +283,14 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    396  
    397  		float linear = transform->input_gamma_table_gray[device];
    398  
    399 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    400 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    401  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    402  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    403  
    404 -		*dest++ = clamp_u8(out_device_r*255);
    405 -		*dest++ = clamp_u8(out_device_g*255);
    406 -		*dest++ = clamp_u8(out_device_b*255);
    407 +		dest[r_out] = clamp_u8(out_device_r*255);
    408 +		dest[1]     = clamp_u8(out_device_g*255);
    409 +		dest[b_out] = clamp_u8(out_device_b*255);
    410 +		dest += 3;
    411  	}
    412  }
    413  
    414 @@ -283,8 +300,11 @@ static void qcms_transform_data_gray_out_lut(qcms_transform *transform, unsigned
    415  	See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
    416  */
    417  
    418 -static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    419 +static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    420  {
    421 +	const int r_out = output_format.r;
    422 +	const int b_out = output_format.b;
    423 +
    424  	unsigned int i;
    425  	for (i = 0; i < length; i++) {
    426  		float out_device_r, out_device_g, out_device_b;
    427 @@ -293,20 +313,24 @@ static void qcms_transform_data_graya_out_lut(qcms_transform *transform, unsigne
    428  
    429  		float linear = transform->input_gamma_table_gray[device];
    430  
    431 -                out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    432 +		out_device_r = lut_interp_linear(linear, transform->output_gamma_lut_r, transform->output_gamma_lut_r_length);
    433  		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
    434  		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    435  
    436 -		*dest++ = clamp_u8(out_device_r*255);
    437 -		*dest++ = clamp_u8(out_device_g*255);
    438 -		*dest++ = clamp_u8(out_device_b*255);
    439 -		*dest++ = alpha;
    440 +		dest[r_out] = clamp_u8(out_device_r*255);
    441 +		dest[1]     = clamp_u8(out_device_g*255);
    442 +		dest[b_out] = clamp_u8(out_device_b*255);
    443 +		dest[3]     = alpha;
    444 +		dest += 4;
    445  	}
    446  }
    447  
    448  
    449 -static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    450 +static void qcms_transform_data_gray_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    451  {
    452 +	const int r_out = output_format.r;
    453 +	const int b_out = output_format.b;
    454 +
    455  	unsigned int i;
    456  	for (i = 0; i < length; i++) {
    457  		unsigned char device = *src++;
    458 @@ -317,14 +341,19 @@ static void qcms_transform_data_gray_out_precache(qcms_transform *transform, uns
    459  		/* we could round here... */
    460  		gray = linear * PRECACHE_OUTPUT_MAX;
    461  
    462 -		*dest++ = transform->output_table_r->data[gray];
    463 -		*dest++ = transform->output_table_g->data[gray];
    464 -		*dest++ = transform->output_table_b->data[gray];
    465 +		dest[r_out] = transform->output_table_r->data[gray];
    466 +		dest[1]     = transform->output_table_g->data[gray];
    467 +		dest[b_out] = transform->output_table_b->data[gray];
    468 +		dest += 3;
    469  	}
    470  }
    471  
    472 -static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    473 +
    474 +static void qcms_transform_data_graya_out_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    475  {
    476 +	const int r_out = output_format.r;
    477 +	const int b_out = output_format.b;
    478 +
    479  	unsigned int i;
    480  	for (i = 0; i < length; i++) {
    481  		unsigned char device = *src++;
    482 @@ -336,15 +365,19 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
    483  		/* we could round here... */
    484  		gray = linear * PRECACHE_OUTPUT_MAX;
    485  
    486 -		*dest++ = transform->output_table_r->data[gray];
    487 -		*dest++ = transform->output_table_g->data[gray];
    488 -		*dest++ = transform->output_table_b->data[gray];
    489 -		*dest++ = alpha;
    490 +		dest[r_out] = transform->output_table_r->data[gray];
    491 +		dest[1]     = transform->output_table_g->data[gray];
    492 +		dest[b_out] = transform->output_table_b->data[gray];
    493 +		dest[3]     = alpha;
    494 +		dest += 4;
    495  	}
    496  }
    497  
    498 -static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    499 +static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    500  {
    501 +	const int r_out = output_format.r;
    502 +	const int b_out = output_format.b;
    503 +
    504  	unsigned int i;
    505  	float (*mat)[4] = transform->matrix;
    506  	for (i = 0; i < length; i++) {
    507 @@ -370,14 +403,18 @@ static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform,
    508  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    509  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    510  
    511 -		*dest++ = transform->output_table_r->data[r];
    512 -		*dest++ = transform->output_table_g->data[g];
    513 -		*dest++ = transform->output_table_b->data[b];
    514 +		dest[r_out] = transform->output_table_r->data[r];
    515 +		dest[1]     = transform->output_table_g->data[g];
    516 +		dest[b_out] = transform->output_table_b->data[b];
    517 +		dest += 3;
    518  	}
    519  }
    520  
    521 -static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    522 +static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    523  {
    524 +	const int r_out = output_format.r;
    525 +	const int b_out = output_format.b;
    526 +
    527  	unsigned int i;
    528  	float (*mat)[4] = transform->matrix;
    529  	for (i = 0; i < length; i++) {
    530 @@ -404,16 +441,21 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
    531  		g = out_linear_g * PRECACHE_OUTPUT_MAX;
    532  		b = out_linear_b * PRECACHE_OUTPUT_MAX;
    533  
    534 -		*dest++ = transform->output_table_r->data[r];
    535 -		*dest++ = transform->output_table_g->data[g];
    536 -		*dest++ = transform->output_table_b->data[b];
    537 -		*dest++ = alpha;
    538 +		dest[r_out] = transform->output_table_r->data[r];
    539 +		dest[1]     = transform->output_table_g->data[g];
    540 +		dest[b_out] = transform->output_table_b->data[b];
    541 +		dest[3]     = alpha;
    542 +		dest += 4;
    543  	}
    544  }
    545  
    546  // Not used
    547  /* 
    548 -static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    549 +static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    550 +{
    551 +	const int r_out = output_format.r;
    552 +	const int b_out = output_format.b;
    553 +
    554  	unsigned int i;
    555  	int xy_len = 1;
    556  	int x_len = transform->grid_size;
    557 @@ -462,15 +504,20 @@ static void qcms_transform_data_clut(qcms_transform *transform, unsigned char *s
    558  		float b_y2 = lerp(b_x3, b_x4, y_d);
    559  		float clut_b = lerp(b_y1, b_y2, z_d);
    560  
    561 -		*dest++ = clamp_u8(clut_r*255.0f);
    562 -		*dest++ = clamp_u8(clut_g*255.0f);
    563 -		*dest++ = clamp_u8(clut_b*255.0f);
    564 -	}	
    565 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    566 +		dest[1]     = clamp_u8(clut_g*255.0f);
    567 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    568 +		dest += 3;
    569 +	}
    570  }
    571  */
    572  
    573  // Using lcms' tetra interpolation algorithm.
    574 -static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    575 +static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    576 +{
    577 +	const int r_out = output_format.r;
    578 +	const int b_out = output_format.b;
    579 +
    580  	unsigned int i;
    581  	int xy_len = 1;
    582  	int x_len = transform->grid_size;
    583 @@ -577,15 +624,20 @@ static void qcms_transform_data_tetra_clut_rgba(qcms_transform *transform, unsig
    584  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    585  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    586  
    587 -		*dest++ = clamp_u8(clut_r*255.0f);
    588 -		*dest++ = clamp_u8(clut_g*255.0f);
    589 -		*dest++ = clamp_u8(clut_b*255.0f);
    590 -		*dest++ = in_a;
    591 -	}	
    592 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    593 +		dest[1]     = clamp_u8(clut_g*255.0f);
    594 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    595 +		dest[3]     = in_a;
    596 +		dest += 4;
    597 +	}
    598  }
    599  
    600  // Using lcms' tetra interpolation code.
    601 -static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length) {
    602 +static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    603 +{
    604 +	const int r_out = output_format.r;
    605 +	const int b_out = output_format.b;
    606 +
    607  	unsigned int i;
    608  	int xy_len = 1;
    609  	int x_len = transform->grid_size;
    610 @@ -691,14 +743,18 @@ static void qcms_transform_data_tetra_clut(qcms_transform *transform, unsigned c
    611  		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
    612  		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
    613  
    614 -		*dest++ = clamp_u8(clut_r*255.0f);
    615 -		*dest++ = clamp_u8(clut_g*255.0f);
    616 -		*dest++ = clamp_u8(clut_b*255.0f);
    617 -	}	
    618 +		dest[r_out] = clamp_u8(clut_r*255.0f);
    619 +		dest[1]     = clamp_u8(clut_g*255.0f);
    620 +		dest[b_out] = clamp_u8(clut_b*255.0f);
    621 +		dest += 3;
    622 +	}
    623  }
    624  
    625 -static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    626 +static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    627  {
    628 +	const int r_out = output_format.r;
    629 +	const int b_out = output_format.b;
    630 +
    631  	unsigned int i;
    632  	float (*mat)[4] = transform->matrix;
    633  	for (i = 0; i < length; i++) {
    634 @@ -726,14 +782,18 @@ static void qcms_transform_data_rgb_out_lut(qcms_transform *transform, unsigned
    635  		out_device_b = lut_interp_linear(out_linear_b, 
    636  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    637  
    638 -		*dest++ = clamp_u8(out_device_r*255);
    639 -		*dest++ = clamp_u8(out_device_g*255);
    640 -		*dest++ = clamp_u8(out_device_b*255);
    641 +		dest[r_out] = clamp_u8(out_device_r*255);
    642 +		dest[1]     = clamp_u8(out_device_g*255);
    643 +		dest[b_out] = clamp_u8(out_device_b*255);
    644 +		dest += 3;
    645  	}
    646  }
    647  
    648 -static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    649 +static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    650  {
    651 +	const int r_out = output_format.r;
    652 +	const int b_out = output_format.b;
    653 +
    654  	unsigned int i;
    655  	float (*mat)[4] = transform->matrix;
    656  	for (i = 0; i < length; i++) {
    657 @@ -762,16 +822,20 @@ static void qcms_transform_data_rgba_out_lut(qcms_transform *transform, unsigned
    658  		out_device_b = lut_interp_linear(out_linear_b, 
    659  				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
    660  
    661 -		*dest++ = clamp_u8(out_device_r*255);
    662 -		*dest++ = clamp_u8(out_device_g*255);
    663 -		*dest++ = clamp_u8(out_device_b*255);
    664 -		*dest++ = alpha;
    665 +		dest[r_out] = clamp_u8(out_device_r*255);
    666 +		dest[1]     = clamp_u8(out_device_g*255);
    667 +		dest[b_out] = clamp_u8(out_device_b*255);
    668 +		dest[3]     = alpha;
    669 +		dest += 4;
    670  	}
    671  }
    672  
    673  #if 0
    674 -static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
    675 +static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length, qcms_format_type output_format)
    676  {
    677 +	const int r_out = output_format.r;
    678 +	const int b_out = output_format.b;
    679 +
    680  	int i;
    681  	float (*mat)[4] = transform->matrix;
    682  	for (i = 0; i < length; i++) {
    683 @@ -787,9 +851,10 @@ static void qcms_transform_data_rgb_out_linear(qcms_transform *transform, unsign
    684  		float out_linear_g = mat[0][1]*linear_r + mat[1][1]*linear_g + mat[2][1]*linear_b;
    685  		float out_linear_b = mat[0][2]*linear_r + mat[1][2]*linear_g + mat[2][2]*linear_b;
    686  
    687 -		*dest++ = clamp_u8(out_linear_r*255);
    688 -		*dest++ = clamp_u8(out_linear_g*255);
    689 -		*dest++ = clamp_u8(out_linear_b*255);
    690 +		dest[r_out] = clamp_u8(out_linear_r*255);
    691 +		dest[1]     = clamp_u8(out_linear_g*255);
    692 +		dest[b_out] = clamp_u8(out_linear_b*255);
    693 +		dest += 3;
    694  	}
    695  }
    696  #endif
    697 @@ -815,7 +880,7 @@ void precache_release(struct precache_output *p)
    698  	}
    699  }
    700  
    701 -#ifdef HAS_POSIX_MEMALIGN
    702 +#ifdef HAVE_POSIX_MEMALIGN
    703  static qcms_transform *transform_alloc(void)
    704  {
    705  	qcms_transform *t;
    706 @@ -994,13 +1059,15 @@ void qcms_profile_precache_output_transform(qcms_profile *profile)
    707  	if (profile->color_space != RGB_SIGNATURE)
    708  		return;
    709  
    710 -	/* don't precache since we will use the B2A LUT */
    711 -	if (profile->B2A0)
    712 -		return;
    713 +	if (qcms_supports_iccv4) {
    714 +		/* don't precache since we will use the B2A LUT */
    715 +		if (profile->B2A0)
    716 +			return;
    717  
    718 -	/* don't precache since we will use the mBA LUT */
    719 -	if (profile->mBA)
    720 -		return;
    721 +		/* don't precache since we will use the mBA LUT */
    722 +		if (profile->mBA)
    723 +			return;
    724 +	}
    725  
    726  	/* don't precache if we do not have the TRC curves */
    727  	if (!profile->redTRC || !profile->greenTRC || !profile->blueTRC)
    728 @@ -1157,14 +1224,14 @@ qcms_transform* qcms_transform_create(
    729                  	return NULL;
    730              	}
    731  		if (precache) {
    732 -#ifdef X86
    733 +#if defined(SSE2_ENABLE) && defined(X86)
    734  		    if (sse_version_available() >= 2) {
    735  			    if (in_type == QCMS_DATA_RGB_8)
    736  				    transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
    737  			    else
    738  				    transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;
    739  
    740 -#if !(defined(_MSC_VER) && defined(_M_AMD64))
    741 +#if defined(SSE2_ENABLE) && !(defined(_MSC_VER) && defined(_M_AMD64))
    742                      /* Microsoft Compiler for x64 doesn't support MMX.
    743                       * SSE code uses MMX so that we disable on x64 */
    744  		    } else
    745 @@ -1256,13 +1323,34 @@ qcms_transform* qcms_transform_create(
    746  	return transform;
    747  }
    748  
    749 -#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
    750 +/* __force_align_arg_pointer__ is an x86-only attribute, and gcc/clang warns on unused
    751 + * attributes. Don't use this on ARM, AMD64 or MIPS. __has_attribute can detect the presence
    752 + * of the attribute but is currently only supported by clang */
    753 +#if defined(__has_attribute)
    754 +#define HAS_FORCE_ALIGN_ARG_POINTER __has_attribute(__force_align_arg_pointer__)
    755 +#elif defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__) && !defined(__arm__) && !defined(__mips__)
    756 +#define HAS_FORCE_ALIGN_ARG_POINTER 1
    757 +#else
    758 +#define HAS_FORCE_ALIGN_ARG_POINTER 0
    759 +#endif
    760 +
    761 +#if HAS_FORCE_ALIGN_ARG_POINTER
    762  /* we need this to avoid crashes when gcc assumes the stack is 128bit aligned */
    763  __attribute__((__force_align_arg_pointer__))
    764  #endif
    765  void qcms_transform_data(qcms_transform *transform, void *src, void *dest, size_t length)
    766  {
    767 -	transform->transform_fn(transform, src, dest, length);
    768 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
    769 +
    770 +	transform->transform_fn(transform, src, dest, length, output_rgbx);
    771 +}
    772 +
    773 +void qcms_transform_data_type(qcms_transform *transform, void *src, void *dest, size_t length, qcms_output_type type)
    774 +{
    775 +	static const struct _qcms_format_type output_rgbx = { 0, 2 };
    776 +	static const struct _qcms_format_type output_bgrx = { 2, 0 };
    777 +
    778 +	transform->transform_fn(transform, src, dest, length, type == QCMS_OUTPUT_BGRX ? output_bgrx : output_rgbx);
    779  }
    780  
    781  qcms_bool qcms_supports_iccv4;
    782 diff --git a/third_party/qcms/src/transform_util.c b/third_party/qcms/src/transform_util.c
    783 index e8447e5..f4338b2 100644
    784 --- a/third_party/qcms/src/transform_util.c
    785 +++ b/third_party/qcms/src/transform_util.c
    786 @@ -36,7 +36,7 @@
    787  
    788  /* value must be a value between 0 and 1 */
    789  //XXX: is the above a good restriction to have?
    790 -float lut_interp_linear(double value, uint16_t *table, int length)
    791 +float lut_interp_linear(double value, uint16_t *table, size_t length)
    792  {
    793  	int upper, lower;
    794  	value = value * (length - 1); // scale to length of the array
    795 @@ -49,11 +49,11 @@ float lut_interp_linear(double value, uint16_t *table, int length)
    796  }
    797  
    798  /* same as above but takes and returns a uint16_t value representing a range from 0..1 */
    799 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
    800 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length)
    801  {
    802  	/* Start scaling input_value to the length of the array: 65535*(length-1).
    803  	 * We'll divide out the 65535 next */
    804 -	uint32_t value = (input_value * (length - 1));
    805 +	uintptr_t value = (input_value * (length - 1));
    806  	uint32_t upper = (value + 65534) / 65535; /* equivalent to ceil(value/65535) */
    807  	uint32_t lower = value / 65535;           /* equivalent to floor(value/65535) */
    808  	/* interp is the distance from upper to value scaled to 0..65535 */
    809 @@ -67,11 +67,11 @@ uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length)
    810  /* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
    811   * and returns a uint8_t value representing a range from 0..1 */
    812  static
    813 -uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, int length)
    814 +uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table, size_t length)
    815  {
    816  	/* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
    817  	 * We'll divide out the PRECACHE_OUTPUT_MAX next */
    818 -	uint32_t value = (input_value * (length - 1));
    819 +	uintptr_t value = (input_value * (length - 1));
    820  
    821  	/* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
    822  	uint32_t upper = (value + PRECACHE_OUTPUT_MAX-1) / PRECACHE_OUTPUT_MAX;
    823 @@ -91,7 +91,7 @@ uint8_t lut_interp_linear_precache_output(uint32_t input_value, uint16_t *table,
    824  
    825  /* value must be a value between 0 and 1 */
    826  //XXX: is the above a good restriction to have?
    827 -float lut_interp_linear_float(float value, float *table, int length)
    828 +float lut_interp_linear_float(float value, float *table, size_t length)
    829  {
    830          int upper, lower;
    831          value = value * (length - 1);
    832 @@ -235,6 +235,21 @@ float u8Fixed8Number_to_float(uint16_t x)
    833  	return x/256.;
    834  }
    835  
    836 +/* The SSE2 code uses min & max which let NaNs pass through.
    837 +   We want to try to prevent that here by ensuring that
    838 +   gamma table is within expected values. */
    839 +void validate_gamma_table(float gamma_table[256])
    840 +{
    841 +	int i;
    842 +	for (i = 0; i < 256; i++) {
    843 +		// Note: we check that the gamma is not in range
    844 +		// instead of out of range so that we catch NaNs
    845 +		if (!(gamma_table[i] >= 0.f && gamma_table[i] <= 1.f)) {
    846 +			gamma_table[i] = 0.f;
    847 +		}
    848 +	}
    849 +}
    850 +
    851  float *build_input_gamma_table(struct curveType *TRC)
    852  {
    853  	float *gamma_table;
    854 @@ -254,7 +269,10 @@ float *build_input_gamma_table(struct curveType *TRC)
    855  			}
    856  		}
    857  	}
    858 -        return gamma_table;
    859 +
    860 +	validate_gamma_table(gamma_table);
    861 +
    862 +	return gamma_table;
    863  }
    864  
    865  struct matrix build_colorant_matrix(qcms_profile *p)
    866 @@ -390,7 +408,7 @@ uint16_fract_t lut_inverse_interp16(uint16_t Value, uint16_t LutTable[], int len
    867   which has an maximum error of about 9855 (pixel difference of ~38.346)
    868  
    869   For now, we punt the decision of output size to the caller. */
    870 -static uint16_t *invert_lut(uint16_t *table, int length, int out_length)
    871 +static uint16_t *invert_lut(uint16_t *table, int length, size_t out_length)
    872  {
    873          int i;
    874          /* for now we invert the lut by creating a lut of size out_length
    875 diff --git a/third_party/qcms/src/transform_util.h b/third_party/qcms/src/transform_util.h
    876 index 8f358a8..de465f4 100644
    877 --- a/third_party/qcms/src/transform_util.h
    878 +++ b/third_party/qcms/src/transform_util.h
    879 @@ -31,9 +31,9 @@
    880  //XXX: could use a bettername
    881  typedef uint16_t uint16_fract_t;
    882  
    883 -float lut_interp_linear(double value, uint16_t *table, int length);
    884 -float lut_interp_linear_float(float value, float *table, int length);
    885 -uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, int length);
    886 +float lut_interp_linear(double value, uint16_t *table, size_t length);
    887 +float lut_interp_linear_float(float value, float *table, size_t length);
    888 +uint16_t lut_interp_linear16(uint16_t input_value, uint16_t *table, size_t length);
    889  
    890  
    891  static inline float lerp(float a, float b, float t)
    892