1 #include "rs_core.rsh" 2 #include "rs_graphics.rsh" 3 #include "rs_structs.h" 4 5 /** 6 * Allocation sampling 7 */ 8 static const void * __attribute__((overloadable)) 9 getElementAt(rs_allocation a, uint32_t x, uint32_t lod) { 10 Allocation_t *alloc = (Allocation_t *)a.p; 11 const Type_t *type = (const Type_t*)alloc->mHal.state.type; 12 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr; 13 14 const uint32_t offset = type->mHal.state.lodOffset[lod]; 15 const uint32_t eSize = alloc->mHal.state.elementSizeBytes; 16 17 return &p[offset + eSize * x]; 18 } 19 20 static const void * __attribute__((overloadable)) 21 getElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t lod) { 22 Allocation_t *alloc = (Allocation_t *)a.p; 23 const Type_t *type = (const Type_t*)alloc->mHal.state.type; 24 const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr; 25 26 const uint32_t eSize = alloc->mHal.state.elementSizeBytes; 27 const uint32_t offset = type->mHal.state.lodOffset[lod]; 28 uint32_t stride; 29 if(lod == 0) { 30 stride = alloc->mHal.drvState.stride; 31 } else { 32 stride = type->mHal.state.lodDimX[lod] * eSize; 33 } 34 35 return &p[offset + (eSize * x) + (y * stride)]; 36 } 37 38 static const void * __attribute__((overloadable)) 39 getElementAt(rs_allocation a, uint2 uv, uint32_t lod) { 40 return getElementAt(a, uv.x, uv.y, lod); 41 } 42 43 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) { 44 if (wrap == RS_SAMPLER_WRAP) { 45 coord = coord % size; 46 if (coord < 0) { 47 coord += size; 48 } 49 } 50 return (uint32_t)max(0, min(coord, size - 1)); 51 } 52 53 // 565 Conversion bits taken from SkBitmap 54 #define SK_R16_BITS 5 55 #define SK_G16_BITS 6 56 #define SK_B16_BITS 5 57 58 #define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS) 59 #define SK_G16_SHIFT (SK_B16_BITS) 60 #define SK_B16_SHIFT 0 61 62 #define SK_R16_MASK ((1 << SK_R16_BITS) - 1) 63 #define SK_G16_MASK ((1 << SK_G16_BITS) - 1) 64 #define SK_B16_MASK ((1 << SK_B16_BITS) - 1) 65 66 #define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK) 67 #define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK) 68 #define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK) 69 70 static inline unsigned SkR16ToR32(unsigned r) { 71 return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8)); 72 } 73 74 static inline unsigned SkG16ToG32(unsigned g) { 75 return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8)); 76 } 77 78 static inline unsigned SkB16ToB32(unsigned b) { 79 return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8)); 80 } 81 82 #define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c)) 83 #define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c)) 84 #define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c)) 85 86 static float3 getFrom565(uint16_t color) { 87 float3 result; 88 result.x = (float)SkPacked16ToR32(color); 89 result.y = (float)SkPacked16ToG32(color); 90 result.z = (float)SkPacked16ToB32(color); 91 return result; 92 } 93 94 #define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert) \ 95 static outtype __attribute__((overloadable)) \ 96 getSample##vecsize(rs_allocation a, float2 weights, \ 97 uint32_t iPixel, uint32_t next, uint32_t lod) { \ 98 intype *p0c = (intype*)getElementAt(a, iPixel, lod); \ 99 intype *p1c = (intype*)getElementAt(a, next, lod); \ 100 outtype p0 = convert(*p0c); \ 101 outtype p1 = convert(*p1c); \ 102 return p0 * weights.x + p1 * weights.y; \ 103 } 104 #define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert) \ 105 static outtype __attribute__((overloadable)) \ 106 getSample##vecsize(rs_allocation a, float4 weights, \ 107 uint2 iPixel, uint2 next, uint32_t lod) { \ 108 intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod); \ 109 intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod); \ 110 intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod); \ 111 intype *p3c = (intype*)getElementAt(a, next.x, next.y, lod); \ 112 outtype p0 = convert(*p0c); \ 113 outtype p1 = convert(*p1c); \ 114 outtype p2 = convert(*p2c); \ 115 outtype p3 = convert(*p3c); \ 116 return p0 * weights.x + p1 * weights.y + p2 * weights.z + p3 * weights.w; \ 117 } 118 119 SAMPLE_1D_FUNC(1, uchar, float, (float)) 120 SAMPLE_1D_FUNC(2, uchar2, float2, convert_float2) 121 SAMPLE_1D_FUNC(3, uchar3, float3, convert_float3) 122 SAMPLE_1D_FUNC(4, uchar4, float4, convert_float4) 123 SAMPLE_1D_FUNC(565, uint16_t, float3, getFrom565) 124 125 SAMPLE_2D_FUNC(1, uchar, float, (float)) 126 SAMPLE_2D_FUNC(2, uchar2, float2, convert_float2) 127 SAMPLE_2D_FUNC(3, uchar3, float3, convert_float3) 128 SAMPLE_2D_FUNC(4, uchar4, float4, convert_float4) 129 SAMPLE_2D_FUNC(565, uint16_t, float3, getFrom565) 130 131 // Sampler function body is the same for all dimensions 132 #define SAMPLE_FUNC_BODY() \ 133 { \ 134 rs_element elem = rsAllocationGetElement(a); \ 135 rs_data_kind dk = rsElementGetDataKind(elem); \ 136 rs_data_type dt = rsElementGetDataType(elem); \ 137 \ 138 if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) { \ 139 float4 zero = {0.0f, 0.0f, 0.0f, 0.0f}; \ 140 return zero; \ 141 } \ 142 \ 143 uint32_t vecSize = rsElementGetVectorSize(elem); \ 144 Allocation_t *alloc = (Allocation_t *)a.p; \ 145 const Type_t *type = (const Type_t*)alloc->mHal.state.type; \ 146 \ 147 rs_sampler_value sampleMin = rsSamplerGetMinification(s); \ 148 rs_sampler_value sampleMag = rsSamplerGetMagnification(s); \ 149 \ 150 if (lod <= 0.0f) { \ 151 if (sampleMag == RS_SAMPLER_NEAREST) { \ 152 return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \ 153 } \ 154 return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, 0); \ 155 } \ 156 \ 157 if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) { \ 158 uint32_t maxLOD = type->mHal.state.lodCount - 1; \ 159 lod = min(lod, (float)maxLOD); \ 160 uint32_t nearestLOD = (uint32_t)round(lod); \ 161 return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD); \ 162 } \ 163 \ 164 if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) { \ 165 uint32_t lod0 = (uint32_t)floor(lod); \ 166 uint32_t lod1 = (uint32_t)ceil(lod); \ 167 uint32_t maxLOD = type->mHal.state.lodCount - 1; \ 168 lod0 = min(lod0, maxLOD); \ 169 lod1 = min(lod1, maxLOD); \ 170 float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0); \ 171 float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1); \ 172 float frac = lod - (float)lod0; \ 173 return sample0 * (1.0f - frac) + sample1 * frac; \ 174 } \ 175 \ 176 return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0); \ 177 } // End of sampler function body is the same for all dimensions 178 179 // Body of the bilinear sampling function 180 #define BILINEAR_SAMPLE_BODY() \ 181 { \ 182 float4 result; \ 183 if (dt == RS_TYPE_UNSIGNED_5_6_5) { \ 184 result.xyz = getSample565(a, weights, iPixel, next, lod); \ 185 return result; \ 186 } \ 187 \ 188 switch(vecSize) { \ 189 case 1: \ 190 result.x = getSample1(a, weights, iPixel, next, lod); \ 191 break; \ 192 case 2: \ 193 result.xy = getSample2(a, weights, iPixel, next, lod); \ 194 break; \ 195 case 3: \ 196 result.xyz = getSample3(a, weights, iPixel, next, lod); \ 197 break; \ 198 case 4: \ 199 result = getSample4(a, weights, iPixel, next, lod); \ 200 break; \ 201 } \ 202 \ 203 return result * 0.003921569f; \ 204 } // End of body of the bilinear sampling function 205 206 // Body of the nearest sampling function 207 #define NEAREST_SAMPLE_BODY() \ 208 { \ 209 float4 result; \ 210 if (dt == RS_TYPE_UNSIGNED_5_6_5) { \ 211 result.xyz = getFrom565(*(uint16_t*)getElementAt(a, iPixel, lod)); \ 212 return result; \ 213 } \ 214 \ 215 switch(vecSize) { \ 216 case 1: \ 217 result.x = (float)(*((uchar*)getElementAt(a, iPixel, lod))); \ 218 break; \ 219 case 2: \ 220 result.xy = convert_float2(*((uchar2*)getElementAt(a, iPixel, lod))); \ 221 break; \ 222 case 3: \ 223 result.xyz = convert_float3(*((uchar3*)getElementAt(a, iPixel, lod))); \ 224 break; \ 225 case 4: \ 226 result = convert_float4(*((uchar4*)getElementAt(a, iPixel, lod))); \ 227 break; \ 228 } \ 229 \ 230 return result * 0.003921569f; \ 231 } // End of body of the nearest sampling function 232 233 static float4 __attribute__((overloadable)) 234 getBilinearSample(rs_allocation a, float2 weights, 235 uint32_t iPixel, uint32_t next, 236 uint32_t vecSize, rs_data_type dt, uint32_t lod) { 237 BILINEAR_SAMPLE_BODY() 238 } 239 240 static float4 __attribute__((overloadable)) 241 getBilinearSample(rs_allocation a, float4 weights, 242 uint2 iPixel, uint2 next, 243 uint32_t vecSize, rs_data_type dt, uint32_t lod) { 244 BILINEAR_SAMPLE_BODY() 245 } 246 247 static float4 __attribute__((overloadable)) 248 getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize, 249 rs_data_type dt, uint32_t lod) { 250 NEAREST_SAMPLE_BODY() 251 } 252 253 static float4 __attribute__((overloadable)) 254 getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize, 255 rs_data_type dt, uint32_t lod) { 256 NEAREST_SAMPLE_BODY() 257 } 258 259 static float4 __attribute__((overloadable)) 260 sample_LOD_LinearPixel(rs_allocation a, const Type_t *type, 261 uint32_t vecSize, rs_data_type dt, 262 rs_sampler s, 263 float uv, uint32_t lod) { 264 rs_sampler_value wrapS = rsSamplerGetWrapS(s); 265 int32_t sourceW = type->mHal.state.lodDimX[lod]; 266 float pixelUV = uv * (float)(sourceW); 267 int32_t iPixel = (int32_t)(pixelUV); 268 float frac = pixelUV - (float)iPixel; 269 270 if (frac < 0.5f) { 271 iPixel -= 1; 272 frac += 0.5f; 273 } else { 274 frac -= 0.5f; 275 } 276 277 float oneMinusFrac = 1.0f - frac; 278 279 float2 weights; 280 weights.x = oneMinusFrac; 281 weights.y = frac; 282 283 uint32_t next = wrapI(wrapS, iPixel + 1, sourceW); 284 uint32_t location = wrapI(wrapS, iPixel, sourceW); 285 286 return getBilinearSample(a, weights, location, next, vecSize, dt, lod); 287 } 288 289 static float4 __attribute__((overloadable)) 290 sample_LOD_NearestPixel(rs_allocation a, const Type_t *type, 291 uint32_t vecSize, rs_data_type dt, 292 rs_sampler s, 293 float uv, uint32_t lod) { 294 rs_sampler_value wrapS = rsSamplerGetWrapS(s); 295 int32_t sourceW = type->mHal.state.lodDimX[lod]; 296 int32_t iPixel = (int32_t)(uv * (float)(sourceW)); 297 uint32_t location = wrapI(wrapS, iPixel, sourceW); 298 299 return getNearestSample(a, location, vecSize, dt, lod); 300 } 301 302 static float4 __attribute__((overloadable)) 303 sample_LOD_LinearPixel(rs_allocation a, const Type_t *type, 304 uint32_t vecSize, rs_data_type dt, 305 rs_sampler s, 306 float2 uv, uint32_t lod) { 307 rs_sampler_value wrapS = rsSamplerGetWrapS(s); 308 rs_sampler_value wrapT = rsSamplerGetWrapT(s); 309 310 int32_t sourceW = type->mHal.state.lodDimX[lod]; 311 int32_t sourceH = type->mHal.state.lodDimY[lod]; 312 313 float2 dimF; 314 dimF.x = (float)(sourceW); 315 dimF.y = (float)(sourceH); 316 float2 pixelUV = uv * dimF; 317 int2 iPixel = convert_int2(pixelUV); 318 319 float2 frac = pixelUV - convert_float2(iPixel); 320 321 if (frac.x < 0.5f) { 322 iPixel.x -= 1; 323 frac.x += 0.5f; 324 } else { 325 frac.x -= 0.5f; 326 } 327 if (frac.y < 0.5f) { 328 iPixel.y -= 1; 329 frac.y += 0.5f; 330 } else { 331 frac.y -= 0.5f; 332 } 333 float2 oneMinusFrac = 1.0f - frac; 334 335 float4 weights; 336 weights.x = oneMinusFrac.x * oneMinusFrac.y; 337 weights.y = frac.x * oneMinusFrac.y; 338 weights.z = oneMinusFrac.x * frac.y; 339 weights.w = frac.x * frac.y; 340 341 uint2 next; 342 next.x = wrapI(wrapS, iPixel.x + 1, sourceW); 343 next.y = wrapI(wrapT, iPixel.y + 1, sourceH); 344 uint2 location; 345 location.x = wrapI(wrapS, iPixel.x, sourceW); 346 location.y = wrapI(wrapT, iPixel.y, sourceH); 347 348 return getBilinearSample(a, weights, location, next, vecSize, dt, lod); 349 } 350 351 static float4 __attribute__((overloadable)) 352 sample_LOD_NearestPixel(rs_allocation a, const Type_t *type, 353 uint32_t vecSize, rs_data_type dt, 354 rs_sampler s, 355 float2 uv, uint32_t lod) { 356 rs_sampler_value wrapS = rsSamplerGetWrapS(s); 357 rs_sampler_value wrapT = rsSamplerGetWrapT(s); 358 359 int32_t sourceW = type->mHal.state.lodDimX[lod]; 360 int32_t sourceH = type->mHal.state.lodDimY[lod]; 361 362 float2 dimF; 363 dimF.x = (float)(sourceW); 364 dimF.y = (float)(sourceH); 365 int2 iPixel = convert_int2(uv * dimF); 366 367 uint2 location; 368 location.x = wrapI(wrapS, iPixel.x, sourceW); 369 location.y = wrapI(wrapT, iPixel.y, sourceH); 370 return getNearestSample(a, location, vecSize, dt, lod); 371 } 372 373 extern const float4 __attribute__((overloadable)) 374 rsSample(rs_allocation a, rs_sampler s, float location) { 375 return rsSample(a, s, location, 0); 376 } 377 378 extern const float4 __attribute__((overloadable)) 379 rsSample(rs_allocation a, rs_sampler s, float uv, float lod) { 380 SAMPLE_FUNC_BODY() 381 } 382 383 extern const float4 __attribute__((overloadable)) 384 rsSample(rs_allocation a, rs_sampler s, float2 location) { 385 return rsSample(a, s, location, 0.0f); 386 } 387 388 extern const float4 __attribute__((overloadable)) 389 rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) { 390 SAMPLE_FUNC_BODY() 391 } 392