1 #define COMPONENT_SIZE 8 2 #define MASK 0xff 3 #define ONE_HALF 0x80 4 5 #define A_SHIFT 8 * 3 6 #define R_SHIFT 8 * 2 7 #define G_SHIFT 8 8 #define A_MASK 0xff000000 9 #define R_MASK 0xff0000 10 #define G_MASK 0xff00 11 12 #define RB_MASK 0xff00ff 13 #define AG_MASK 0xff00ff00 14 #define RB_ONE_HALF 0x800080 15 #define RB_MASK_PLUS_ONE 0x10000100 16 17 #define ALPHA_8(x) ((x) >> A_SHIFT) 18 #define RED_8(x) (((x) >> R_SHIFT) & MASK) 19 #define GREEN_8(x) (((x) >> G_SHIFT) & MASK) 20 #define BLUE_8(x) ((x) & MASK) 21 22 /* 23 * ARMv6 has UQADD8 instruction, which implements unsigned saturated 24 * addition for 8-bit values packed in 32-bit registers. It is very useful 25 * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would 26 * otherwise need a lot of arithmetic operations to simulate this operation). 27 * Since most of the major ARM linux distros are built for ARMv7, we are 28 * much less dependent on runtime CPU detection and can get practical 29 * benefits from conditional compilation here for a lot of users. 30 */ 31 32 #if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ 33 !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) 34 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ 35 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ 36 defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ 37 defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ 38 defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ 39 defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) 40 41 static force_inline uint32_t 42 un8x4_add_un8x4 (uint32_t x, uint32_t y) 43 { 44 uint32_t t; 45 asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); 46 return t; 47 } 48 49 #define UN8x4_ADD_UN8x4(x, y) \ 50 ((x) = un8x4_add_un8x4 ((x), (y))) 51 52 #define UN8_rb_ADD_UN8_rb(x, y, t) \ 53 ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) 54 55 #define ADD_UN8(x, y, t) \ 56 ((t) = (x), un8x4_add_un8x4 ((t), (y))) 57 58 #endif 59 #endif 60 61 /*****************************************************************************/ 62 63 /* 64 * Helper macros. 65 */ 66 67 #define MUL_UN8(a, b, t) \ 68 ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) 69 70 #define DIV_UN8(a, b) \ 71 (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) 72 73 #ifndef ADD_UN8 74 #define ADD_UN8(x, y, t) \ 75 ((t) = (x) + (y), \ 76 (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) 77 #endif 78 79 #define DIV_ONE_UN8(x) \ 80 (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) 81 82 /* 83 * The methods below use some tricks to be able to do two color 84 * components at the same time. 85 */ 86 87 /* 88 * x_rb = (x_rb * a) / 255 89 */ 90 #define UN8_rb_MUL_UN8(x, a, t) \ 91 do \ 92 { \ 93 t = ((x) & RB_MASK) * (a); \ 94 t += RB_ONE_HALF; \ 95 x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 96 x &= RB_MASK; \ 97 } while (0) 98 99 /* 100 * x_rb = min (x_rb + y_rb, 255) 101 */ 102 #ifndef UN8_rb_ADD_UN8_rb 103 #define UN8_rb_ADD_UN8_rb(x, y, t) \ 104 do \ 105 { \ 106 t = ((x) + (y)); \ 107 t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ 108 x = (t & RB_MASK); \ 109 } while (0) 110 #endif 111 112 /* 113 * x_rb = (x_rb * a_rb) / 255 114 */ 115 #define UN8_rb_MUL_UN8_rb(x, a, t) \ 116 do \ 117 { \ 118 t = (x & MASK) * (a & MASK); \ 119 t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ 120 t += RB_ONE_HALF; \ 121 t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ 122 x = t & RB_MASK; \ 123 } while (0) 124 125 /* 126 * x_c = (x_c * a) / 255 127 */ 128 #define UN8x4_MUL_UN8(x, a) \ 129 do \ 130 { \ 131 uint32_t r1__, r2__, t__; \ 132 \ 133 r1__ = (x); \ 134 UN8_rb_MUL_UN8 (r1__, (a), t__); \ 135 \ 136 r2__ = (x) >> G_SHIFT; \ 137 UN8_rb_MUL_UN8 (r2__, (a), t__); \ 138 \ 139 (x) = r1__ | (r2__ << G_SHIFT); \ 140 } while (0) 141 142 /* 143 * x_c = (x_c * a) / 255 + y_c 144 */ 145 #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ 146 do \ 147 { \ 148 uint32_t r1__, r2__, r3__, t__; \ 149 \ 150 r1__ = (x); \ 151 r2__ = (y) & RB_MASK; \ 152 UN8_rb_MUL_UN8 (r1__, (a), t__); \ 153 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 154 \ 155 r2__ = (x) >> G_SHIFT; \ 156 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 157 UN8_rb_MUL_UN8 (r2__, (a), t__); \ 158 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 159 \ 160 (x) = r1__ | (r2__ << G_SHIFT); \ 161 } while (0) 162 163 /* 164 * x_c = (x_c * a + y_c * b) / 255 165 */ 166 #define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ 167 do \ 168 { \ 169 uint32_t r1__, r2__, r3__, t__; \ 170 \ 171 r1__ = (x); \ 172 r2__ = (y); \ 173 UN8_rb_MUL_UN8 (r1__, (a), t__); \ 174 UN8_rb_MUL_UN8 (r2__, (b), t__); \ 175 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 176 \ 177 r2__ = ((x) >> G_SHIFT); \ 178 r3__ = ((y) >> G_SHIFT); \ 179 UN8_rb_MUL_UN8 (r2__, (a), t__); \ 180 UN8_rb_MUL_UN8 (r3__, (b), t__); \ 181 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 182 \ 183 (x) = r1__ | (r2__ << G_SHIFT); \ 184 } while (0) 185 186 /* 187 * x_c = (x_c * a_c) / 255 188 */ 189 #define UN8x4_MUL_UN8x4(x, a) \ 190 do \ 191 { \ 192 uint32_t r1__, r2__, r3__, t__; \ 193 \ 194 r1__ = (x); \ 195 r2__ = (a); \ 196 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 197 \ 198 r2__ = (x) >> G_SHIFT; \ 199 r3__ = (a) >> G_SHIFT; \ 200 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 201 \ 202 (x) = r1__ | (r2__ << G_SHIFT); \ 203 } while (0) 204 205 /* 206 * x_c = (x_c * a_c) / 255 + y_c 207 */ 208 #define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ 209 do \ 210 { \ 211 uint32_t r1__, r2__, r3__, t__; \ 212 \ 213 r1__ = (x); \ 214 r2__ = (a); \ 215 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 216 r2__ = (y) & RB_MASK; \ 217 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 218 \ 219 r2__ = ((x) >> G_SHIFT); \ 220 r3__ = ((a) >> G_SHIFT); \ 221 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 222 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 223 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 224 \ 225 (x) = r1__ | (r2__ << G_SHIFT); \ 226 } while (0) 227 228 /* 229 * x_c = (x_c * a_c + y_c * b) / 255 230 */ 231 #define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ 232 do \ 233 { \ 234 uint32_t r1__, r2__, r3__, t__; \ 235 \ 236 r1__ = (x); \ 237 r2__ = (a); \ 238 UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ 239 r2__ = (y); \ 240 UN8_rb_MUL_UN8 (r2__, (b), t__); \ 241 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 242 \ 243 r2__ = (x) >> G_SHIFT; \ 244 r3__ = (a) >> G_SHIFT; \ 245 UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ 246 r3__ = (y) >> G_SHIFT; \ 247 UN8_rb_MUL_UN8 (r3__, (b), t__); \ 248 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 249 \ 250 x = r1__ | (r2__ << G_SHIFT); \ 251 } while (0) 252 253 /* 254 x_c = min(x_c + y_c, 255) 255 */ 256 #ifndef UN8x4_ADD_UN8x4 257 #define UN8x4_ADD_UN8x4(x, y) \ 258 do \ 259 { \ 260 uint32_t r1__, r2__, r3__, t__; \ 261 \ 262 r1__ = (x) & RB_MASK; \ 263 r2__ = (y) & RB_MASK; \ 264 UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ 265 \ 266 r2__ = ((x) >> G_SHIFT) & RB_MASK; \ 267 r3__ = ((y) >> G_SHIFT) & RB_MASK; \ 268 UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ 269 \ 270 x = r1__ | (r2__ << G_SHIFT); \ 271 } while (0) 272 #endif 273