Home | History | Annotate | Download | only in runtime
      1 #include "rs_core.rsh"
      2 #include "rs_graphics.rsh"
      3 #include "rs_structs.h"
      4 
      5 /**
      6 * Allocation sampling
      7 */
      8 static const void * __attribute__((overloadable))
      9         getElementAt(rs_allocation a, uint32_t x, uint32_t lod) {
     10     Allocation_t *alloc = (Allocation_t *)a.p;
     11     const Type_t *type = (const Type_t*)alloc->mHal.state.type;
     12     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
     13 
     14     const uint32_t offset = type->mHal.state.lodOffset[lod];
     15     const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
     16 
     17     return &p[offset + eSize * x];
     18 }
     19 
     20 static const void * __attribute__((overloadable))
     21         getElementAt(rs_allocation a, uint32_t x, uint32_t y, uint32_t lod) {
     22     Allocation_t *alloc = (Allocation_t *)a.p;
     23     const Type_t *type = (const Type_t*)alloc->mHal.state.type;
     24     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.mallocPtr;
     25 
     26     const uint32_t eSize = alloc->mHal.state.elementSizeBytes;
     27     const uint32_t offset = type->mHal.state.lodOffset[lod];
     28     uint32_t stride;
     29     if(lod == 0) {
     30         stride = alloc->mHal.drvState.stride;
     31     } else {
     32         stride = type->mHal.state.lodDimX[lod] * eSize;
     33     }
     34 
     35     return &p[offset + (eSize * x) + (y * stride)];
     36 }
     37 
     38 static const void * __attribute__((overloadable))
     39         getElementAt(rs_allocation a, uint2 uv, uint32_t lod) {
     40     return getElementAt(a, uv.x, uv.y, lod);
     41 }
     42 
     43 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
     44     if (wrap == RS_SAMPLER_WRAP) {
     45         coord = coord % size;
     46         if (coord < 0) {
     47             coord += size;
     48         }
     49     }
     50     return (uint32_t)max(0, min(coord, size - 1));
     51 }
     52 
     53 // 565 Conversion bits taken from SkBitmap
     54 #define SK_R16_BITS     5
     55 #define SK_G16_BITS     6
     56 #define SK_B16_BITS     5
     57 
     58 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     59 #define SK_G16_SHIFT    (SK_B16_BITS)
     60 #define SK_B16_SHIFT    0
     61 
     62 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     63 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     64 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     65 
     66 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     67 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
     68 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
     69 
     70 static inline unsigned SkR16ToR32(unsigned r) {
     71     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
     72 }
     73 
     74 static inline unsigned SkG16ToG32(unsigned g) {
     75     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
     76 }
     77 
     78 static inline unsigned SkB16ToB32(unsigned b) {
     79     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
     80 }
     81 
     82 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
     83 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
     84 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
     85 
     86 static float3 getFrom565(uint16_t color) {
     87     float3 result;
     88     result.x = (float)SkPacked16ToR32(color);
     89     result.y = (float)SkPacked16ToG32(color);
     90     result.z = (float)SkPacked16ToB32(color);
     91     return result;
     92 }
     93 
     94 #define SAMPLE_1D_FUNC(vecsize, intype, outtype, convert)                                       \
     95         static outtype __attribute__((overloadable))                                            \
     96                 getSample##vecsize(rs_allocation a, float2 weights,                             \
     97                                    uint32_t iPixel, uint32_t next, uint32_t lod) {              \
     98             intype *p0c = (intype*)getElementAt(a, iPixel, lod);                                \
     99             intype *p1c = (intype*)getElementAt(a, next, lod);                                  \
    100             outtype p0 = convert(*p0c);                                                         \
    101             outtype p1 = convert(*p1c);                                                         \
    102             return p0 * weights.x + p1 * weights.y;                                             \
    103         }
    104 #define SAMPLE_2D_FUNC(vecsize, intype, outtype, convert)                                       \
    105         static outtype __attribute__((overloadable))                                            \
    106                     getSample##vecsize(rs_allocation a, float4 weights,                         \
    107                                        uint2 iPixel, uint2 next, uint32_t lod) {                \
    108             intype *p0c = (intype*)getElementAt(a, iPixel.x, iPixel.y, lod);                    \
    109             intype *p1c = (intype*)getElementAt(a, next.x, iPixel.y, lod);                      \
    110             intype *p2c = (intype*)getElementAt(a, iPixel.x, next.y, lod);                      \
    111             intype *p3c = (intype*)getElementAt(a, next.x, next.y, lod);                        \
    112             outtype p0 = convert(*p0c);                                                         \
    113             outtype p1 = convert(*p1c);                                                         \
    114             outtype p2 = convert(*p2c);                                                         \
    115             outtype p3 = convert(*p3c);                                                         \
    116             return p0 * weights.x + p1 * weights.y + p2 * weights.z + p3 * weights.w;           \
    117         }
    118 
    119 SAMPLE_1D_FUNC(1, uchar, float, (float))
    120 SAMPLE_1D_FUNC(2, uchar2, float2, convert_float2)
    121 SAMPLE_1D_FUNC(3, uchar3, float3, convert_float3)
    122 SAMPLE_1D_FUNC(4, uchar4, float4, convert_float4)
    123 SAMPLE_1D_FUNC(565, uint16_t, float3, getFrom565)
    124 
    125 SAMPLE_2D_FUNC(1, uchar, float, (float))
    126 SAMPLE_2D_FUNC(2, uchar2, float2, convert_float2)
    127 SAMPLE_2D_FUNC(3, uchar3, float3, convert_float3)
    128 SAMPLE_2D_FUNC(4, uchar4, float4, convert_float4)
    129 SAMPLE_2D_FUNC(565, uint16_t, float3, getFrom565)
    130 
    131 // Sampler function body is the same for all dimensions
    132 #define SAMPLE_FUNC_BODY()                                                                      \
    133 {                                                                                               \
    134     rs_element elem = rsAllocationGetElement(a);                                                \
    135     rs_data_kind dk = rsElementGetDataKind(elem);                                               \
    136     rs_data_type dt = rsElementGetDataType(elem);                                               \
    137                                                                                                 \
    138     if (dk == RS_KIND_USER || (dt != RS_TYPE_UNSIGNED_8 && dt != RS_TYPE_UNSIGNED_5_6_5)) {     \
    139         float4 zero = {0.0f, 0.0f, 0.0f, 0.0f};                                                 \
    140         return zero;                                                                            \
    141     }                                                                                           \
    142                                                                                                 \
    143     uint32_t vecSize = rsElementGetVectorSize(elem);                                            \
    144     Allocation_t *alloc = (Allocation_t *)a.p;                                                  \
    145     const Type_t *type = (const Type_t*)alloc->mHal.state.type;                                 \
    146                                                                                                 \
    147     rs_sampler_value sampleMin = rsSamplerGetMinification(s);                                  \
    148     rs_sampler_value sampleMag = rsSamplerGetMagnification(s);                                 \
    149                                                                                                 \
    150     if (lod <= 0.0f) {                                                                          \
    151         if (sampleMag == RS_SAMPLER_NEAREST) {                                                  \
    152             return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0);                     \
    153         }                                                                                       \
    154         return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, 0);                          \
    155     }                                                                                           \
    156                                                                                                 \
    157     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {                                           \
    158         uint32_t maxLOD = type->mHal.state.lodCount - 1;                                        \
    159         lod = min(lod, (float)maxLOD);                                                          \
    160         uint32_t nearestLOD = (uint32_t)round(lod);                                             \
    161         return sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, nearestLOD);                 \
    162     }                                                                                           \
    163                                                                                                 \
    164     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {                                            \
    165         uint32_t lod0 = (uint32_t)floor(lod);                                                   \
    166         uint32_t lod1 = (uint32_t)ceil(lod);                                                    \
    167         uint32_t maxLOD = type->mHal.state.lodCount - 1;                                        \
    168         lod0 = min(lod0, maxLOD);                                                               \
    169         lod1 = min(lod1, maxLOD);                                                               \
    170         float4 sample0 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod0);             \
    171         float4 sample1 = sample_LOD_LinearPixel(a, type, vecSize, dt, s, uv, lod1);             \
    172         float frac = lod - (float)lod0;                                                         \
    173         return sample0 * (1.0f - frac) + sample1 * frac;                                        \
    174     }                                                                                           \
    175                                                                                                 \
    176     return sample_LOD_NearestPixel(a, type, vecSize, dt, s, uv, 0);                             \
    177 } // End of sampler function body is the same for all dimensions
    178 
    179 // Body of the bilinear sampling function
    180 #define BILINEAR_SAMPLE_BODY()                                                                  \
    181 {                                                                                               \
    182     float4 result;                                                                              \
    183     if (dt == RS_TYPE_UNSIGNED_5_6_5) {                                                         \
    184         result.xyz = getSample565(a, weights, iPixel, next, lod);                               \
    185         return result;                                                                          \
    186     }                                                                                           \
    187                                                                                                 \
    188     switch(vecSize) {                                                                           \
    189     case 1:                                                                                     \
    190         result.x = getSample1(a, weights, iPixel, next, lod);                                   \
    191         break;                                                                                  \
    192     case 2:                                                                                     \
    193         result.xy = getSample2(a, weights, iPixel, next, lod);                                  \
    194         break;                                                                                  \
    195     case 3:                                                                                     \
    196         result.xyz = getSample3(a, weights, iPixel, next, lod);                                 \
    197         break;                                                                                  \
    198     case 4:                                                                                     \
    199         result = getSample4(a, weights, iPixel, next, lod);                                     \
    200         break;                                                                                  \
    201     }                                                                                           \
    202                                                                                                 \
    203     return result * 0.003921569f;                                                                              \
    204 } // End of body of the bilinear sampling function
    205 
    206 // Body of the nearest sampling function
    207 #define NEAREST_SAMPLE_BODY()                                                                   \
    208 {                                                                                               \
    209     float4 result;                                                                              \
    210     if (dt == RS_TYPE_UNSIGNED_5_6_5) {                                                         \
    211         result.xyz = getFrom565(*(uint16_t*)getElementAt(a, iPixel, lod));                      \
    212        return result;                                                                           \
    213     }                                                                                           \
    214                                                                                                 \
    215     switch(vecSize) {                                                                           \
    216     case 1:                                                                                     \
    217         result.x = (float)(*((uchar*)getElementAt(a, iPixel, lod)));                            \
    218         break;                                                                                  \
    219     case 2:                                                                                     \
    220         result.xy = convert_float2(*((uchar2*)getElementAt(a, iPixel, lod)));                   \
    221         break;                                                                                  \
    222     case 3:                                                                                     \
    223         result.xyz = convert_float3(*((uchar3*)getElementAt(a, iPixel, lod)));                  \
    224         break;                                                                                  \
    225     case 4:                                                                                     \
    226         result = convert_float4(*((uchar4*)getElementAt(a, iPixel, lod)));                      \
    227         break;                                                                                  \
    228     }                                                                                           \
    229                                                                                                 \
    230     return result * 0.003921569f;                                                                              \
    231 } // End of body of the nearest sampling function
    232 
    233 static float4 __attribute__((overloadable))
    234         getBilinearSample(rs_allocation a, float2 weights,
    235                           uint32_t iPixel, uint32_t next,
    236                           uint32_t vecSize, rs_data_type dt, uint32_t lod) {
    237     BILINEAR_SAMPLE_BODY()
    238 }
    239 
    240 static float4 __attribute__((overloadable))
    241         getBilinearSample(rs_allocation a, float4 weights,
    242                           uint2 iPixel, uint2 next,
    243                           uint32_t vecSize, rs_data_type dt, uint32_t lod) {
    244     BILINEAR_SAMPLE_BODY()
    245 }
    246 
    247 static float4  __attribute__((overloadable))
    248         getNearestSample(rs_allocation a, uint32_t iPixel, uint32_t vecSize,
    249                          rs_data_type dt, uint32_t lod) {
    250     NEAREST_SAMPLE_BODY()
    251 }
    252 
    253 static float4  __attribute__((overloadable))
    254         getNearestSample(rs_allocation a, uint2 iPixel, uint32_t vecSize,
    255                          rs_data_type dt, uint32_t lod) {
    256     NEAREST_SAMPLE_BODY()
    257 }
    258 
    259 static float4 __attribute__((overloadable))
    260         sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
    261                                uint32_t vecSize, rs_data_type dt,
    262                                rs_sampler s,
    263                                float uv, uint32_t lod) {
    264     rs_sampler_value wrapS = rsSamplerGetWrapS(s);
    265     int32_t sourceW = type->mHal.state.lodDimX[lod];
    266     float pixelUV = uv * (float)(sourceW);
    267     int32_t iPixel = (int32_t)(pixelUV);
    268     float frac = pixelUV - (float)iPixel;
    269 
    270     if (frac < 0.5f) {
    271         iPixel -= 1;
    272         frac += 0.5f;
    273     } else {
    274         frac -= 0.5f;
    275     }
    276 
    277     float oneMinusFrac = 1.0f - frac;
    278 
    279     float2 weights;
    280     weights.x = oneMinusFrac;
    281     weights.y = frac;
    282 
    283     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
    284     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    285 
    286     return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
    287 }
    288 
    289 static float4 __attribute__((overloadable))
    290         sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
    291                                 uint32_t vecSize, rs_data_type dt,
    292                                 rs_sampler s,
    293                                 float uv, uint32_t lod) {
    294     rs_sampler_value wrapS = rsSamplerGetWrapS(s);
    295     int32_t sourceW = type->mHal.state.lodDimX[lod];
    296     int32_t iPixel = (int32_t)(uv * (float)(sourceW));
    297     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    298 
    299     return getNearestSample(a, location, vecSize, dt, lod);
    300 }
    301 
    302 static float4 __attribute__((overloadable))
    303         sample_LOD_LinearPixel(rs_allocation a, const Type_t *type,
    304                                uint32_t vecSize, rs_data_type dt,
    305                                rs_sampler s,
    306                                float2 uv, uint32_t lod) {
    307     rs_sampler_value wrapS = rsSamplerGetWrapS(s);
    308     rs_sampler_value wrapT = rsSamplerGetWrapT(s);
    309 
    310     int32_t sourceW = type->mHal.state.lodDimX[lod];
    311     int32_t sourceH = type->mHal.state.lodDimY[lod];
    312 
    313     float2 dimF;
    314     dimF.x = (float)(sourceW);
    315     dimF.y = (float)(sourceH);
    316     float2 pixelUV = uv * dimF;
    317     int2 iPixel = convert_int2(pixelUV);
    318 
    319     float2 frac = pixelUV - convert_float2(iPixel);
    320 
    321     if (frac.x < 0.5f) {
    322         iPixel.x -= 1;
    323         frac.x += 0.5f;
    324     } else {
    325         frac.x -= 0.5f;
    326     }
    327     if (frac.y < 0.5f) {
    328         iPixel.y -= 1;
    329         frac.y += 0.5f;
    330     } else {
    331         frac.y -= 0.5f;
    332     }
    333     float2 oneMinusFrac = 1.0f - frac;
    334 
    335     float4 weights;
    336     weights.x = oneMinusFrac.x * oneMinusFrac.y;
    337     weights.y = frac.x * oneMinusFrac.y;
    338     weights.z = oneMinusFrac.x * frac.y;
    339     weights.w = frac.x * frac.y;
    340 
    341     uint2 next;
    342     next.x = wrapI(wrapS, iPixel.x + 1, sourceW);
    343     next.y = wrapI(wrapT, iPixel.y + 1, sourceH);
    344     uint2 location;
    345     location.x = wrapI(wrapS, iPixel.x, sourceW);
    346     location.y = wrapI(wrapT, iPixel.y, sourceH);
    347 
    348     return getBilinearSample(a, weights, location, next, vecSize, dt, lod);
    349 }
    350 
    351 static float4 __attribute__((overloadable))
    352         sample_LOD_NearestPixel(rs_allocation a, const Type_t *type,
    353                                 uint32_t vecSize, rs_data_type dt,
    354                                 rs_sampler s,
    355                                 float2 uv, uint32_t lod) {
    356     rs_sampler_value wrapS = rsSamplerGetWrapS(s);
    357     rs_sampler_value wrapT = rsSamplerGetWrapT(s);
    358 
    359     int32_t sourceW = type->mHal.state.lodDimX[lod];
    360     int32_t sourceH = type->mHal.state.lodDimY[lod];
    361 
    362     float2 dimF;
    363     dimF.x = (float)(sourceW);
    364     dimF.y = (float)(sourceH);
    365     int2 iPixel = convert_int2(uv * dimF);
    366 
    367     uint2 location;
    368     location.x = wrapI(wrapS, iPixel.x, sourceW);
    369     location.y = wrapI(wrapT, iPixel.y, sourceH);
    370     return getNearestSample(a, location, vecSize, dt, lod);
    371 }
    372 
    373 extern const float4 __attribute__((overloadable))
    374         rsSample(rs_allocation a, rs_sampler s, float location) {
    375     return rsSample(a, s, location, 0);
    376 }
    377 
    378 extern const float4 __attribute__((overloadable))
    379         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
    380     SAMPLE_FUNC_BODY()
    381 }
    382 
    383 extern const float4 __attribute__((overloadable))
    384         rsSample(rs_allocation a, rs_sampler s, float2 location) {
    385     return rsSample(a, s, location, 0.0f);
    386 }
    387 
    388 extern const float4 __attribute__((overloadable))
    389         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
    390     SAMPLE_FUNC_BODY()
    391 }
    392