Home | History | Annotate | Download | only in runtime
      1 #include "rs_core.rsh"
      2 #include "rs_structs.h"
      3 
      4 
      5 // 565 Conversion bits taken from SkBitmap
      6 #define SK_R16_BITS     5
      7 #define SK_G16_BITS     6
      8 #define SK_B16_BITS     5
      9 
     10 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     11 #define SK_G16_SHIFT    (SK_B16_BITS)
     12 #define SK_B16_SHIFT    0
     13 
     14 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     15 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     16 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     17 
     18 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     19 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
     20 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
     21 
     22 static inline unsigned SkR16ToR32(unsigned r) {
     23     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
     24 }
     25 
     26 static inline unsigned SkG16ToG32(unsigned g) {
     27     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
     28 }
     29 
     30 static inline unsigned SkB16ToB32(unsigned b) {
     31     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
     32 }
     33 
     34 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
     35 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
     36 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
     37 
     38 static float3 getFrom565(uint16_t color) {
     39     float3 result;
     40     result.x = (float)SkPacked16ToR32(color);
     41     result.y = (float)SkPacked16ToG32(color);
     42     result.z = (float)SkPacked16ToB32(color);
     43     return result;
     44 }
     45 
     46 /**
     47 * Allocation sampling
     48 */
     49 static inline float __attribute__((overloadable))
     50         getElementAt1(const uint8_t *p, int32_t x) {
     51     float r = p[x];
     52     return r;
     53 }
     54 
     55 static inline float2 __attribute__((overloadable))
     56         getElementAt2(const uint8_t *p, int32_t x) {
     57     x *= 2;
     58     float2 r = {p[x], p[x+1]};
     59     return r;
     60 }
     61 
     62 static inline float3 __attribute__((overloadable))
     63         getElementAt3(const uint8_t *p, int32_t x) {
     64     x *= 4;
     65     float3 r = {p[x], p[x+1], p[x+2]};
     66     return r;
     67 }
     68 
     69 static inline float4 __attribute__((overloadable))
     70         getElementAt4(const uint8_t *p, int32_t x) {
     71     x *= 4;
     72     const uchar4 *p2 = (const uchar4 *)&p[x];
     73     return convert_float4(p2[0]);
     74 }
     75 
     76 static inline float3 __attribute__((overloadable))
     77         getElementAt565(const uint8_t *p, int32_t x) {
     78     x *= 2;
     79     float3 r = getFrom565(((const uint16_t *)p)[0]);
     80     return r;
     81 }
     82 
     83 static inline float __attribute__((overloadable))
     84         getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
     85     p += y * stride;
     86     float r = p[x];
     87     return r;
     88 }
     89 
     90 static inline float2 __attribute__((overloadable))
     91         getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
     92     p += y * stride;
     93     x *= 2;
     94     float2 r = {p[x], p[x+1]};
     95     return r;
     96 }
     97 
     98 static inline float3 __attribute__((overloadable))
     99         getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    100     p += y * stride;
    101     x *= 4;
    102     float3 r = {p[x], p[x+1], p[x+2]};
    103     return r;
    104 }
    105 
    106 static inline float4 __attribute__((overloadable))
    107         getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    108     p += y * stride;
    109     x *= 4;
    110     float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
    111     return r;
    112 }
    113 
    114 static inline float3 __attribute__((overloadable))
    115         getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    116     p += y * stride;
    117     x *= 2;
    118     float3 r = getFrom565(((const uint16_t *)p)[0]);
    119     return r;
    120 }
    121 
    122 
    123 
    124 
    125 
    126 static float4 __attribute__((overloadable))
    127             getSample_A(const uint8_t *p, int32_t iPixel,
    128                           int32_t next, float w0, float w1) {
    129     float p0 = getElementAt1(p, iPixel);
    130     float p1 = getElementAt1(p, next);
    131     float r = p0 * w0 + p1 * w1;
    132     r *= (1.f / 255.f);
    133     float4 ret = {0.f, 0.f, 0.f, r};
    134     return ret;
    135 }
    136 static float4 __attribute__((overloadable))
    137             getSample_L(const uint8_t *p, int32_t iPixel,
    138                           int32_t next, float w0, float w1) {
    139     float p0 = getElementAt1(p, iPixel);
    140     float p1 = getElementAt1(p, next);
    141     float r = p0 * w0 + p1 * w1;
    142     r *= (1.f / 255.f);
    143     float4 ret = {r, r, r, 1.f};
    144     return ret;
    145 }
    146 static float4 __attribute__((overloadable))
    147             getSample_LA(const uint8_t *p, int32_t iPixel,
    148                            int32_t next, float w0, float w1) {
    149     float2 p0 = getElementAt2(p, iPixel);
    150     float2 p1 = getElementAt2(p, next);
    151     float2 r = p0 * w0 + p1 * w1;
    152     r *= (1.f / 255.f);
    153     float4 ret = {r.x, r.x, r.x, r.y};
    154     return ret;
    155 }
    156 static float4 __attribute__((overloadable))
    157             getSample_RGB(const uint8_t *p, int32_t iPixel,
    158                             int32_t next, float w0, float w1) {
    159     float3 p0 = getElementAt3(p, iPixel);
    160     float3 p1 = getElementAt3(p, next);
    161     float3 r = p0 * w0 + p1 * w1;
    162     r *= (1.f / 255.f);
    163     float4 ret = {r.x, r.x, r.z, 1.f};
    164     return ret;
    165 }
    166 static float4 __attribute__((overloadable))
    167             getSample_565(const uint8_t *p, int32_t iPixel,
    168                            int32_t next, float w0, float w1) {
    169     float3 p0 = getElementAt565(p, iPixel);
    170     float3 p1 = getElementAt565(p, next);
    171     float3 r = p0 * w0 + p1 * w1;
    172     r *= (1.f / 255.f);
    173     float4 ret = {r.x, r.x, r.z, 1.f};
    174     return ret;
    175 }
    176 static float4 __attribute__((overloadable))
    177             getSample_RGBA(const uint8_t *p, int32_t iPixel,
    178                              int32_t next, float w0, float w1) {
    179     float4 p0 = getElementAt4(p, iPixel);
    180     float4 p1 = getElementAt4(p, next);
    181     float4 r = p0 * w0 + p1 * w1;
    182     r *= (1.f / 255.f);
    183     return r;
    184 }
    185 
    186 
    187 static float4 __attribute__((overloadable))
    188             getSample_A(const uint8_t *p, size_t stride,
    189                           int locX, int locY, int nextX, int nextY,
    190                           float w0, float w1, float w2, float w3) {
    191     float p0 = getElementAt1(p, stride, locX, locY);
    192     float p1 = getElementAt1(p, stride, nextX, locY);
    193     float p2 = getElementAt1(p, stride, locX, nextY);
    194     float p3 = getElementAt1(p, stride, nextX, nextY);
    195     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    196     r *= (1.f / 255.f);
    197     float4 ret = {0.f, 0.f, 0.f, r};
    198     return ret;
    199 }
    200 static float4 __attribute__((overloadable))
    201             getSample_L(const uint8_t *p, size_t stride,
    202                          int locX, int locY, int nextX, int nextY,
    203                          float w0, float w1, float w2, float w3) {
    204     float p0 = getElementAt1(p, stride, locX, locY);
    205     float p1 = getElementAt1(p, stride, nextX, locY);
    206     float p2 = getElementAt1(p, stride, locX, nextY);
    207     float p3 = getElementAt1(p, stride, nextX, nextY);
    208     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    209     r *= (1.f / 255.f);
    210     float4 ret = {r, r, r, 1.f};
    211     return ret;
    212 }
    213 static float4 __attribute__((overloadable))
    214             getSample_LA(const uint8_t *p, size_t stride,
    215                          int locX, int locY, int nextX, int nextY,
    216                          float w0, float w1, float w2, float w3) {
    217     float2 p0 = getElementAt2(p, stride, locX, locY);
    218     float2 p1 = getElementAt2(p, stride, nextX, locY);
    219     float2 p2 = getElementAt2(p, stride, locX, nextY);
    220     float2 p3 = getElementAt2(p, stride, nextX, nextY);
    221     float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    222     r *= (1.f / 255.f);
    223     float4 ret = {r.x, r.x, r.x, r.y};
    224     return ret;
    225 }
    226 static float4 __attribute__((overloadable))
    227             getSample_RGB(const uint8_t *p, size_t stride,
    228                          int locX, int locY, int nextX, int nextY,
    229                          float w0, float w1, float w2, float w3) {
    230     float4 p0 = getElementAt4(p, stride, locX, locY);
    231     float4 p1 = getElementAt4(p, stride, nextX, locY);
    232     float4 p2 = getElementAt4(p, stride, locX, nextY);
    233     float4 p3 = getElementAt4(p, stride, nextX, nextY);
    234     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    235     r *= (1.f / 255.f);
    236     float4 ret = {r.x, r.y, r.z, 1.f};
    237     return ret;
    238 }
    239 static float4 __attribute__((overloadable))
    240             getSample_RGBA(const uint8_t *p, size_t stride,
    241                          int locX, int locY, int nextX, int nextY,
    242                          float w0, float w1, float w2, float w3) {
    243     float4 p0 = getElementAt4(p, stride, locX, locY);
    244     float4 p1 = getElementAt4(p, stride, nextX, locY);
    245     float4 p2 = getElementAt4(p, stride, locX, nextY);
    246     float4 p3 = getElementAt4(p, stride, nextX, nextY);
    247     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    248     r *= (1.f / 255.f);
    249     return r;
    250 }
    251 static float4 __attribute__((overloadable))
    252             getSample_565(const uint8_t *p, size_t stride,
    253                          int locX, int locY, int nextX, int nextY,
    254                          float w0, float w1, float w2, float w3) {
    255     float3 p0 = getElementAt565(p, stride, locX, locY);
    256     float3 p1 = getElementAt565(p, stride, nextX, locY);
    257     float3 p2 = getElementAt565(p, stride, locX, nextY);
    258     float3 p3 = getElementAt565(p, stride, nextX, nextY);
    259     float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    260     r *= (1.f / 255.f);
    261     float4 ret;
    262     ret.rgb = r;
    263     ret.w = 1.f;
    264     return ret;
    265 }
    266 
    267 static float4 __attribute__((overloadable))
    268         getBilinearSample1D(const Allocation_t *alloc, float2 weights,
    269                           uint32_t iPixel, uint32_t next,
    270                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
    271 
    272      const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    273 
    274      switch(dk) {
    275      case RS_KIND_PIXEL_RGBA:
    276          return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
    277      case RS_KIND_PIXEL_A:
    278          return getSample_A(p, iPixel, next, weights.x, weights.y);
    279      case RS_KIND_PIXEL_RGB:
    280          if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    281              return getSample_565(p, iPixel, next, weights.x, weights.y);
    282          }
    283          return getSample_RGB(p, iPixel, next, weights.x, weights.y);
    284      case RS_KIND_PIXEL_L:
    285          return getSample_L(p, iPixel, next, weights.x, weights.y);
    286      case RS_KIND_PIXEL_LA:
    287          return getSample_LA(p, iPixel, next, weights.x, weights.y);
    288 
    289      default:
    290          //__builtin_unreachable();
    291          break;
    292      }
    293 
    294      //__builtin_unreachable();
    295      return 0.f;
    296 }
    297 
    298 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
    299     if (wrap == RS_SAMPLER_WRAP) {
    300         coord = coord % size;
    301         if (coord < 0) {
    302             coord += size;
    303         }
    304     }
    305     if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
    306         coord = coord % (size * 2);
    307         if (coord < 0) {
    308             coord = (size * 2) + coord;
    309         }
    310         if (coord >= size) {
    311             coord = (size * 2 - 1) - coord;
    312         }
    313     }
    314     return (uint32_t)max(0, min(coord, size - 1));
    315 }
    316 
    317 static float4 __attribute__((overloadable))
    318         getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
    319                           int lx, int ly, int nx, int ny,
    320                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
    321 
    322     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    323     size_t stride = alloc->mHal.drvState.lod[lod].stride;
    324 
    325     switch(dk) {
    326     case RS_KIND_PIXEL_RGBA:
    327         return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    328     case RS_KIND_PIXEL_A:
    329         return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    330     case RS_KIND_PIXEL_LA:
    331         return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    332     case RS_KIND_PIXEL_RGB:
    333         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    334             return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    335         }
    336         return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    337     case RS_KIND_PIXEL_L:
    338         return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    339 
    340     default:
    341         break;
    342     }
    343 
    344     return 0.f;
    345 }
    346 
    347 static float4  __attribute__((overloadable))
    348         getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
    349                          rs_data_type dt, uint32_t lod) {
    350 
    351     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    352 
    353     float4 result = {0.f, 0.f, 0.f, 255.f};
    354 
    355     switch(dk) {
    356     case RS_KIND_PIXEL_RGBA:
    357         result = getElementAt4(p, iPixel);
    358         break;
    359     case RS_KIND_PIXEL_A:
    360         result.w = getElementAt1(p, iPixel);
    361         break;
    362     case RS_KIND_PIXEL_LA:
    363         result.zw = getElementAt2(p, iPixel);
    364         result.xy = result.z;
    365         break;
    366     case RS_KIND_PIXEL_RGB:
    367         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    368             result.xyz = getElementAt565(p, iPixel);
    369         } else {
    370             result.xyz = getElementAt3(p, iPixel);
    371         }
    372         break;
    373     case RS_KIND_PIXEL_L:
    374         result.xyz = getElementAt1(p, iPixel);
    375 
    376     default:
    377         //__builtin_unreachable();
    378         break;
    379     }
    380 
    381     return result * 0.003921569f;
    382 }
    383 
    384 static float4  __attribute__((overloadable))
    385         getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
    386                          rs_data_type dt, uint32_t lod) {
    387 
    388     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    389     size_t stride = alloc->mHal.drvState.lod[lod].stride;
    390 
    391     float4 result = {0.f, 0.f, 0.f, 255.f};
    392 
    393     switch(dk) {
    394     case RS_KIND_PIXEL_RGBA:
    395         result = getElementAt4(p, stride, iPixel.x, iPixel.y);
    396         break;
    397     case RS_KIND_PIXEL_A:
    398         result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
    399         break;
    400     case RS_KIND_PIXEL_LA:
    401         result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
    402         result.xy = result.z;
    403         break;
    404     case RS_KIND_PIXEL_RGB:
    405         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    406             result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
    407         } else {
    408             result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
    409         }
    410         break;
    411 
    412     default:
    413         //__builtin_unreachable();
    414         break;
    415     }
    416 
    417     return result * 0.003921569f;
    418 }
    419 
    420 static float4 __attribute__((overloadable))
    421         sample_LOD_LinearPixel(const Allocation_t *alloc,
    422                                rs_data_kind dk, rs_data_type dt,
    423                                rs_sampler_value wrapS,
    424                                float uv, uint32_t lod) {
    425 
    426     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    427 
    428     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
    429     float pixelUV = uv * (float)(sourceW);
    430     int32_t iPixel = floor(pixelUV);
    431     float frac = pixelUV - (float)iPixel;
    432 
    433     if (frac < 0.5f) {
    434         iPixel -= 1;
    435         frac += 0.5f;
    436     } else {
    437         frac -= 0.5f;
    438     }
    439 
    440     float oneMinusFrac = 1.0f - frac;
    441 
    442     float2 weights;
    443     weights.x = oneMinusFrac;
    444     weights.y = frac;
    445 
    446     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
    447     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    448 
    449     return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
    450 }
    451 
    452 static float4 __attribute__((overloadable))
    453         sample_LOD_NearestPixel(const Allocation_t *alloc,
    454                                 rs_data_kind dk, rs_data_type dt,
    455                                 rs_sampler_value wrapS,
    456                                 float uv, uint32_t lod) {
    457 
    458     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
    459     int32_t iPixel = floor(uv * (float)(sourceW));
    460     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    461 
    462     return getNearestSample(alloc, location, dk, dt, lod);
    463 }
    464 
    465 static float4 __attribute__((overloadable))
    466         sample_LOD_LinearPixel(const Allocation_t *alloc,
    467                                rs_data_kind dk, rs_data_type dt,
    468                                rs_sampler_value wrapS,
    469                                rs_sampler_value wrapT,
    470                                float2 uv, uint32_t lod) {
    471 
    472     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    473 
    474     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
    475     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
    476 
    477     float pixelU = uv.x * sourceW;
    478     float pixelV = uv.y * sourceH;
    479     int iPixelU = floor(pixelU);
    480     int iPixelV = floor(pixelV);
    481     float fracU = pixelU - iPixelU;
    482     float fracV = pixelV - iPixelV;
    483 
    484     if (fracU < 0.5f) {
    485         iPixelU -= 1;
    486         fracU += 0.5f;
    487     } else {
    488         fracU -= 0.5f;
    489     }
    490     if (fracV < 0.5f) {
    491         iPixelV -= 1;
    492         fracV += 0.5f;
    493     } else {
    494         fracV -= 0.5f;
    495     }
    496     float oneMinusFracU = 1.0f - fracU;
    497     float oneMinusFracV = 1.0f - fracV;
    498 
    499     float w0 = oneMinusFracU * oneMinusFracV;
    500     float w1 = fracU * oneMinusFracV;
    501     float w2 = oneMinusFracU * fracV;
    502     float w3 = fracU * fracV;
    503 
    504     int nx = wrapI(wrapS, iPixelU + 1, sourceW);
    505     int ny = wrapI(wrapT, iPixelV + 1, sourceH);
    506     int lx = wrapI(wrapS, iPixelU, sourceW);
    507     int ly = wrapI(wrapT, iPixelV, sourceH);
    508 
    509     return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
    510 
    511 }
    512 
    513 static float4 __attribute__((overloadable))
    514         sample_LOD_NearestPixel(const Allocation_t *alloc,
    515                                 rs_data_kind dk, rs_data_type dt,
    516                                 rs_sampler_value wrapS,
    517                                 rs_sampler_value wrapT,
    518                                 float2 uv, uint32_t lod) {
    519     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
    520     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
    521 
    522     float2 dimF;
    523     dimF.x = (float)(sourceW);
    524     dimF.y = (float)(sourceH);
    525     int2 iPixel = convert_int2(floor(uv * dimF));
    526 
    527     uint2 location;
    528     location.x = wrapI(wrapS, iPixel.x, sourceW);
    529     location.y = wrapI(wrapT, iPixel.y, sourceH);
    530     return getNearestSample(alloc, location, dk, dt, lod);
    531 }
    532 
    533 extern float4 __attribute__((overloadable))
    534         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
    535 
    536     const Allocation_t *alloc = (const Allocation_t *)a.p;
    537     const Sampler_t *prog = (Sampler_t *)s.p;
    538     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    539     const Element_t *elem = type->mHal.state.element;
    540     rs_data_kind dk = elem->mHal.state.dataKind;
    541     rs_data_type dt = elem->mHal.state.dataType;
    542     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
    543     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
    544     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    545 
    546     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    547         return 0.f;
    548     }
    549 
    550     if (lod <= 0.0f) {
    551         if (sampleMag == RS_SAMPLER_NEAREST) {
    552             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
    553         }
    554         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
    555     }
    556 
    557     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
    558         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    559         lod = min(lod, (float)maxLOD);
    560         uint32_t nearestLOD = (uint32_t)round(lod);
    561         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
    562     }
    563 
    564     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
    565         uint32_t lod0 = (uint32_t)floor(lod);
    566         uint32_t lod1 = (uint32_t)ceil(lod);
    567         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    568         lod0 = min(lod0, maxLOD);
    569         lod1 = min(lod1, maxLOD);
    570         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
    571         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
    572         float frac = lod - (float)lod0;
    573         return sample0 * (1.0f - frac) + sample1 * frac;
    574     }
    575 
    576     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
    577 }
    578 
    579 extern float4 __attribute__((overloadable))
    580         rsSample(rs_allocation a, rs_sampler s, float location) {
    581     return rsSample(a, s, location, 0);
    582 }
    583 
    584 
    585 extern float4 __attribute__((overloadable))
    586         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
    587 
    588     const Allocation_t *alloc = (const Allocation_t *)a.p;
    589     const Sampler_t *prog = (Sampler_t *)s.p;
    590     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    591     const Element_t *elem = type->mHal.state.element;
    592     rs_data_kind dk = elem->mHal.state.dataKind;
    593     rs_data_type dt = elem->mHal.state.dataType;
    594     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
    595     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
    596     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    597     rs_sampler_value wrapT = prog->mHal.state.wrapT;
    598 
    599     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    600         return 0.f;
    601     }
    602 
    603     if (lod <= 0.0f) {
    604         if (sampleMag == RS_SAMPLER_NEAREST) {
    605             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    606         }
    607         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    608     }
    609 
    610     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
    611         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    612         lod = min(lod, (float)maxLOD);
    613         uint32_t nearestLOD = (uint32_t)round(lod);
    614         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
    615     }
    616 
    617     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
    618         uint32_t lod0 = (uint32_t)floor(lod);
    619         uint32_t lod1 = (uint32_t)ceil(lod);
    620         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    621         lod0 = min(lod0, maxLOD);
    622         lod1 = min(lod1, maxLOD);
    623         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
    624         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
    625         float frac = lod - (float)lod0;
    626         return sample0 * (1.0f - frac) + sample1 * frac;
    627     }
    628 
    629     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    630 }
    631 
    632 extern float4 __attribute__((overloadable))
    633         rsSample(rs_allocation a, rs_sampler s, float2 uv) {
    634 
    635     const Allocation_t *alloc = (const Allocation_t *)a.p;
    636     const Sampler_t *prog = (Sampler_t *)s.p;
    637     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    638     const Element_t *elem = type->mHal.state.element;
    639     rs_data_kind dk = elem->mHal.state.dataKind;
    640     rs_data_type dt = elem->mHal.state.dataType;
    641     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    642     rs_sampler_value wrapT = prog->mHal.state.wrapT;
    643 
    644     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    645         return 0.f;
    646     }
    647 
    648     if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
    649         return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    650     }
    651     return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    652 }
    653