Home | History | Annotate | Download | only in runtime
      1 #include "rs_core.rsh"
      2 #include "rs_graphics.rsh"
      3 #include "rs_structs.h"
      4 
      5 
      6 // 565 Conversion bits taken from SkBitmap
      7 #define SK_R16_BITS     5
      8 #define SK_G16_BITS     6
      9 #define SK_B16_BITS     5
     10 
     11 #define SK_R16_SHIFT    (SK_B16_BITS + SK_G16_BITS)
     12 #define SK_G16_SHIFT    (SK_B16_BITS)
     13 #define SK_B16_SHIFT    0
     14 
     15 #define SK_R16_MASK     ((1 << SK_R16_BITS) - 1)
     16 #define SK_G16_MASK     ((1 << SK_G16_BITS) - 1)
     17 #define SK_B16_MASK     ((1 << SK_B16_BITS) - 1)
     18 
     19 #define SkGetPackedR16(color)   (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
     20 #define SkGetPackedG16(color)   (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
     21 #define SkGetPackedB16(color)   (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
     22 
     23 static inline unsigned SkR16ToR32(unsigned r) {
     24     return (r << (8 - SK_R16_BITS)) | (r >> (2 * SK_R16_BITS - 8));
     25 }
     26 
     27 static inline unsigned SkG16ToG32(unsigned g) {
     28     return (g << (8 - SK_G16_BITS)) | (g >> (2 * SK_G16_BITS - 8));
     29 }
     30 
     31 static inline unsigned SkB16ToB32(unsigned b) {
     32     return (b << (8 - SK_B16_BITS)) | (b >> (2 * SK_B16_BITS - 8));
     33 }
     34 
     35 #define SkPacked16ToR32(c)      SkR16ToR32(SkGetPackedR16(c))
     36 #define SkPacked16ToG32(c)      SkG16ToG32(SkGetPackedG16(c))
     37 #define SkPacked16ToB32(c)      SkB16ToB32(SkGetPackedB16(c))
     38 
     39 static float3 getFrom565(uint16_t color) {
     40     float3 result;
     41     result.x = (float)SkPacked16ToR32(color);
     42     result.y = (float)SkPacked16ToG32(color);
     43     result.z = (float)SkPacked16ToB32(color);
     44     return result;
     45 }
     46 
     47 /**
     48 * Allocation sampling
     49 */
     50 static inline float __attribute__((overloadable))
     51         getElementAt1(const uint8_t *p, int32_t x) {
     52     float r = p[x];
     53     return r;
     54 }
     55 
     56 static inline float2 __attribute__((overloadable))
     57         getElementAt2(const uint8_t *p, int32_t x) {
     58     x *= 2;
     59     float2 r = {p[x], p[x+1]};
     60     return r;
     61 }
     62 
     63 static inline float3 __attribute__((overloadable))
     64         getElementAt3(const uint8_t *p, int32_t x) {
     65     x *= 4;
     66     float3 r = {p[x], p[x+1], p[x+2]};
     67     return r;
     68 }
     69 
     70 static inline float4 __attribute__((overloadable))
     71         getElementAt4(const uint8_t *p, int32_t x) {
     72     x *= 4;
     73     const uchar4 *p2 = (const uchar4 *)&p[x];
     74     return convert_float4(p2[0]);
     75 }
     76 
     77 static inline float3 __attribute__((overloadable))
     78         getElementAt565(const uint8_t *p, int32_t x) {
     79     x *= 2;
     80     float3 r = getFrom565(((const uint16_t *)p)[0]);
     81     return r;
     82 }
     83 
     84 static inline float __attribute__((overloadable))
     85         getElementAt1(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
     86     p += y * stride;
     87     float r = p[x];
     88     return r;
     89 }
     90 
     91 static inline float2 __attribute__((overloadable))
     92         getElementAt2(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
     93     p += y * stride;
     94     x *= 2;
     95     float2 r = {p[x], p[x+1]};
     96     return r;
     97 }
     98 
     99 static inline float3 __attribute__((overloadable))
    100         getElementAt3(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    101     p += y * stride;
    102     x *= 4;
    103     float3 r = {p[x], p[x+1], p[x+2]};
    104     return r;
    105 }
    106 
    107 static inline float4 __attribute__((overloadable))
    108         getElementAt4(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    109     p += y * stride;
    110     x *= 4;
    111     float4 r = {p[x], p[x+1], p[x+2], p[x+3]};
    112     return r;
    113 }
    114 
    115 static inline float3 __attribute__((overloadable))
    116         getElementAt565(const uint8_t *p, size_t stride, int32_t x, int32_t y) {
    117     p += y * stride;
    118     x *= 2;
    119     float3 r = getFrom565(((const uint16_t *)p)[0]);
    120     return r;
    121 }
    122 
    123 
    124 
    125 
    126 
    127 static float4 __attribute__((overloadable))
    128             getSample_A(const uint8_t *p, int32_t iPixel,
    129                           int32_t next, float w0, float w1) {
    130     float p0 = getElementAt1(p, iPixel);
    131     float p1 = getElementAt1(p, next);
    132     float r = p0 * w0 + p1 * w1;
    133     r *= (1.f / 255.f);
    134     float4 ret = {0.f, 0.f, 0.f, r};
    135     return ret;
    136 }
    137 static float4 __attribute__((overloadable))
    138             getSample_L(const uint8_t *p, int32_t iPixel,
    139                           int32_t next, float w0, float w1) {
    140     float p0 = getElementAt1(p, iPixel);
    141     float p1 = getElementAt1(p, next);
    142     float r = p0 * w0 + p1 * w1;
    143     r *= (1.f / 255.f);
    144     float4 ret = {r, r, r, 1.f};
    145     return ret;
    146 }
    147 static float4 __attribute__((overloadable))
    148             getSample_LA(const uint8_t *p, int32_t iPixel,
    149                            int32_t next, float w0, float w1) {
    150     float2 p0 = getElementAt2(p, iPixel);
    151     float2 p1 = getElementAt2(p, next);
    152     float2 r = p0 * w0 + p1 * w1;
    153     r *= (1.f / 255.f);
    154     float4 ret = {r.x, r.x, r.x, r.y};
    155     return ret;
    156 }
    157 static float4 __attribute__((overloadable))
    158             getSample_RGB(const uint8_t *p, int32_t iPixel,
    159                             int32_t next, float w0, float w1) {
    160     float3 p0 = getElementAt3(p, iPixel);
    161     float3 p1 = getElementAt3(p, next);
    162     float3 r = p0 * w0 + p1 * w1;
    163     r *= (1.f / 255.f);
    164     float4 ret = {r.x, r.x, r.z, 1.f};
    165     return ret;
    166 }
    167 static float4 __attribute__((overloadable))
    168             getSample_565(const uint8_t *p, int32_t iPixel,
    169                            int32_t next, float w0, float w1) {
    170     float3 p0 = getElementAt565(p, iPixel);
    171     float3 p1 = getElementAt565(p, next);
    172     float3 r = p0 * w0 + p1 * w1;
    173     r *= (1.f / 255.f);
    174     float4 ret = {r.x, r.x, r.z, 1.f};
    175     return ret;
    176 }
    177 static float4 __attribute__((overloadable))
    178             getSample_RGBA(const uint8_t *p, int32_t iPixel,
    179                              int32_t next, float w0, float w1) {
    180     float4 p0 = getElementAt4(p, iPixel);
    181     float4 p1 = getElementAt4(p, next);
    182     float4 r = p0 * w0 + p1 * w1;
    183     r *= (1.f / 255.f);
    184     return r;
    185 }
    186 
    187 
    188 static float4 __attribute__((overloadable))
    189             getSample_A(const uint8_t *p, size_t stride,
    190                           int locX, int locY, int nextX, int nextY,
    191                           float w0, float w1, float w2, float w3) {
    192     float p0 = getElementAt1(p, stride, locX, locY);
    193     float p1 = getElementAt1(p, stride, nextX, locY);
    194     float p2 = getElementAt1(p, stride, locX, nextY);
    195     float p3 = getElementAt1(p, stride, nextX, nextY);
    196     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    197     r *= (1.f / 255.f);
    198     float4 ret = {0.f, 0.f, 0.f, r};
    199     return ret;
    200 }
    201 static float4 __attribute__((overloadable))
    202             getSample_L(const uint8_t *p, size_t stride,
    203                          int locX, int locY, int nextX, int nextY,
    204                          float w0, float w1, float w2, float w3) {
    205     float p0 = getElementAt1(p, stride, locX, locY);
    206     float p1 = getElementAt1(p, stride, nextX, locY);
    207     float p2 = getElementAt1(p, stride, locX, nextY);
    208     float p3 = getElementAt1(p, stride, nextX, nextY);
    209     float r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    210     r *= (1.f / 255.f);
    211     float4 ret = {r, r, r, 1.f};
    212     return ret;
    213 }
    214 static float4 __attribute__((overloadable))
    215             getSample_LA(const uint8_t *p, size_t stride,
    216                          int locX, int locY, int nextX, int nextY,
    217                          float w0, float w1, float w2, float w3) {
    218     float2 p0 = getElementAt2(p, stride, locX, locY);
    219     float2 p1 = getElementAt2(p, stride, nextX, locY);
    220     float2 p2 = getElementAt2(p, stride, locX, nextY);
    221     float2 p3 = getElementAt2(p, stride, nextX, nextY);
    222     float2 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    223     r *= (1.f / 255.f);
    224     float4 ret = {r.x, r.x, r.x, r.y};
    225     return ret;
    226 }
    227 static float4 __attribute__((overloadable))
    228             getSample_RGB(const uint8_t *p, size_t stride,
    229                          int locX, int locY, int nextX, int nextY,
    230                          float w0, float w1, float w2, float w3) {
    231     float4 p0 = getElementAt4(p, stride, locX, locY);
    232     float4 p1 = getElementAt4(p, stride, nextX, locY);
    233     float4 p2 = getElementAt4(p, stride, locX, nextY);
    234     float4 p3 = getElementAt4(p, stride, nextX, nextY);
    235     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    236     r *= (1.f / 255.f);
    237     float4 ret = {r.x, r.y, r.z, 1.f};
    238     return ret;
    239 }
    240 static float4 __attribute__((overloadable))
    241             getSample_RGBA(const uint8_t *p, size_t stride,
    242                          int locX, int locY, int nextX, int nextY,
    243                          float w0, float w1, float w2, float w3) {
    244     float4 p0 = getElementAt4(p, stride, locX, locY);
    245     float4 p1 = getElementAt4(p, stride, nextX, locY);
    246     float4 p2 = getElementAt4(p, stride, locX, nextY);
    247     float4 p3 = getElementAt4(p, stride, nextX, nextY);
    248     float4 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    249     r *= (1.f / 255.f);
    250     return r;
    251 }
    252 static float4 __attribute__((overloadable))
    253             getSample_565(const uint8_t *p, size_t stride,
    254                          int locX, int locY, int nextX, int nextY,
    255                          float w0, float w1, float w2, float w3) {
    256     float3 p0 = getElementAt565(p, stride, locX, locY);
    257     float3 p1 = getElementAt565(p, stride, nextX, locY);
    258     float3 p2 = getElementAt565(p, stride, locX, nextY);
    259     float3 p3 = getElementAt565(p, stride, nextX, nextY);
    260     float3 r = p0 * w0 + p1 * w1 + p2 * w2 + p3 * w3;
    261     r *= (1.f / 255.f);
    262     float4 ret;
    263     ret.rgb = r;
    264     ret.w = 1.f;
    265     return ret;
    266 }
    267 
    268 static float4 __attribute__((overloadable))
    269         getBilinearSample1D(const Allocation_t *alloc, float2 weights,
    270                           uint32_t iPixel, uint32_t next,
    271                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
    272 
    273      const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    274 
    275      switch(dk) {
    276      case RS_KIND_PIXEL_RGBA:
    277          return getSample_RGBA(p, iPixel, next, weights.x, weights.y);
    278      case RS_KIND_PIXEL_A:
    279          return getSample_A(p, iPixel, next, weights.x, weights.y);
    280      case RS_KIND_PIXEL_RGB:
    281          if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    282              return getSample_565(p, iPixel, next, weights.x, weights.y);
    283          }
    284          return getSample_RGB(p, iPixel, next, weights.x, weights.y);
    285      case RS_KIND_PIXEL_L:
    286          return getSample_L(p, iPixel, next, weights.x, weights.y);
    287      case RS_KIND_PIXEL_LA:
    288          return getSample_LA(p, iPixel, next, weights.x, weights.y);
    289 
    290      default:
    291          //__builtin_unreachable();
    292          break;
    293      }
    294 
    295      //__builtin_unreachable();
    296      return 0.f;
    297 }
    298 
    299 static uint32_t wrapI(rs_sampler_value wrap, int32_t coord, int32_t size) {
    300     if (wrap == RS_SAMPLER_WRAP) {
    301         coord = coord % size;
    302         if (coord < 0) {
    303             coord += size;
    304         }
    305     }
    306     if (wrap == RS_SAMPLER_MIRRORED_REPEAT) {
    307         coord = coord % (size * 2);
    308         if (coord < 0) {
    309             coord = (size * 2) + coord;
    310         }
    311         if (coord >= size) {
    312             coord = (size * 2) - coord;
    313         }
    314     }
    315     return (uint32_t)max(0, min(coord, size - 1));
    316 }
    317 
    318 static float4 __attribute__((overloadable))
    319         getBilinearSample2D(const Allocation_t *alloc, float w0, float w1, float w2, float w3,
    320                           int lx, int ly, int nx, int ny,
    321                           rs_data_kind dk, rs_data_type dt, uint32_t lod) {
    322 
    323     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    324     size_t stride = alloc->mHal.drvState.lod[lod].stride;
    325 
    326     switch(dk) {
    327     case RS_KIND_PIXEL_RGBA:
    328         return getSample_RGBA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    329     case RS_KIND_PIXEL_A:
    330         return getSample_A(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    331     case RS_KIND_PIXEL_LA:
    332         return getSample_LA(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    333     case RS_KIND_PIXEL_RGB:
    334         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    335             return getSample_565(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    336         }
    337         return getSample_RGB(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    338     case RS_KIND_PIXEL_L:
    339         return getSample_L(p, stride, lx, ly, nx, ny, w0, w1, w2, w3);
    340 
    341     default:
    342         break;
    343     }
    344 
    345     return 0.f;
    346 }
    347 
    348 static float4  __attribute__((overloadable))
    349         getNearestSample(const Allocation_t *alloc, uint32_t iPixel, rs_data_kind dk,
    350                          rs_data_type dt, uint32_t lod) {
    351 
    352     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    353 
    354     float4 result = {0.f, 0.f, 0.f, 255.f};
    355 
    356     switch(dk) {
    357     case RS_KIND_PIXEL_RGBA:
    358         result = getElementAt4(p, iPixel);
    359         break;
    360     case RS_KIND_PIXEL_A:
    361         result.w = getElementAt1(p, iPixel);
    362         break;
    363     case RS_KIND_PIXEL_LA:
    364         result.zw = getElementAt2(p, iPixel);
    365         result.xy = result.z;
    366         break;
    367     case RS_KIND_PIXEL_RGB:
    368         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    369             result.xyz = getElementAt565(p, iPixel);
    370         } else {
    371             result.xyz = getElementAt3(p, iPixel);
    372         }
    373         break;
    374     case RS_KIND_PIXEL_L:
    375         result.xyz = getElementAt1(p, iPixel);
    376 
    377     default:
    378         //__builtin_unreachable();
    379         break;
    380     }
    381 
    382     return result * 0.003921569f;
    383 }
    384 
    385 static float4  __attribute__((overloadable))
    386         getNearestSample(const Allocation_t *alloc, uint2 iPixel, rs_data_kind dk,
    387                          rs_data_type dt, uint32_t lod) {
    388 
    389     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    390     size_t stride = alloc->mHal.drvState.lod[lod].stride;
    391 
    392     float4 result = {0.f, 0.f, 0.f, 255.f};
    393 
    394     switch(dk) {
    395     case RS_KIND_PIXEL_RGBA:
    396         result = getElementAt4(p, stride, iPixel.x, iPixel.y);
    397         break;
    398     case RS_KIND_PIXEL_A:
    399         result.w = getElementAt1(p, stride, iPixel.x, iPixel.y);
    400         break;
    401     case RS_KIND_PIXEL_LA:
    402         result.zw = getElementAt2(p, stride, iPixel.x, iPixel.y);
    403         result.xy = result.z;
    404         break;
    405     case RS_KIND_PIXEL_RGB:
    406         if (dt == RS_TYPE_UNSIGNED_5_6_5) {
    407             result.xyz = getElementAt565(p, stride, iPixel.x, iPixel.y);
    408         } else {
    409             result.xyz = getElementAt3(p, stride, iPixel.x, iPixel.y);
    410         }
    411         break;
    412 
    413     default:
    414         //__builtin_unreachable();
    415         break;
    416     }
    417 
    418     return result * 0.003921569f;
    419 }
    420 
    421 static float4 __attribute__((overloadable))
    422         sample_LOD_LinearPixel(const Allocation_t *alloc,
    423                                rs_data_kind dk, rs_data_type dt,
    424                                rs_sampler_value wrapS,
    425                                float uv, uint32_t lod) {
    426 
    427     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    428 
    429     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
    430     float pixelUV = uv * (float)(sourceW);
    431     int32_t iPixel = (int32_t)(pixelUV);
    432     float frac = pixelUV - (float)iPixel;
    433 
    434     if (frac < 0.5f) {
    435         iPixel -= 1;
    436         frac += 0.5f;
    437     } else {
    438         frac -= 0.5f;
    439     }
    440 
    441     float oneMinusFrac = 1.0f - frac;
    442 
    443     float2 weights;
    444     weights.x = oneMinusFrac;
    445     weights.y = frac;
    446 
    447     uint32_t next = wrapI(wrapS, iPixel + 1, sourceW);
    448     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    449 
    450     return getBilinearSample1D(alloc, weights, location, next, dk, dt, lod);
    451 }
    452 
    453 static float4 __attribute__((overloadable))
    454         sample_LOD_NearestPixel(const Allocation_t *alloc,
    455                                 rs_data_kind dk, rs_data_type dt,
    456                                 rs_sampler_value wrapS,
    457                                 float uv, uint32_t lod) {
    458 
    459     int32_t sourceW = alloc->mHal.drvState.lod[lod].dimX;
    460     int32_t iPixel = (int32_t)(uv * (float)(sourceW));
    461     uint32_t location = wrapI(wrapS, iPixel, sourceW);
    462 
    463     return getNearestSample(alloc, location, dk, dt, lod);
    464 }
    465 
    466 static float4 __attribute__((overloadable))
    467         sample_LOD_LinearPixel(const Allocation_t *alloc,
    468                                rs_data_kind dk, rs_data_type dt,
    469                                rs_sampler_value wrapS,
    470                                rs_sampler_value wrapT,
    471                                float2 uv, uint32_t lod) {
    472 
    473     const uint8_t *p = (const uint8_t *)alloc->mHal.drvState.lod[lod].mallocPtr;
    474 
    475     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
    476     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
    477 
    478     float pixelU = uv.x * sourceW;
    479     float pixelV = uv.y * sourceH;
    480     int iPixelU = pixelU;
    481     int iPixelV = pixelV;
    482     float fracU = pixelU - iPixelU;
    483     float fracV = pixelV - iPixelV;
    484 
    485     if (fracU < 0.5f) {
    486         iPixelU -= 1;
    487         fracU += 0.5f;
    488     } else {
    489         fracU -= 0.5f;
    490     }
    491     if (fracV < 0.5f) {
    492         iPixelV -= 1;
    493         fracV += 0.5f;
    494     } else {
    495         fracV -= 0.5f;
    496     }
    497     float oneMinusFracU = 1.0f - fracU;
    498     float oneMinusFracV = 1.0f - fracV;
    499 
    500     float w0 = oneMinusFracU * oneMinusFracV;
    501     float w1 = fracU * oneMinusFracV;
    502     float w2 = oneMinusFracU * fracV;
    503     float w3 = fracU * fracV;
    504 
    505     int nx = wrapI(wrapS, iPixelU + 1, sourceW);
    506     int ny = wrapI(wrapT, iPixelV + 1, sourceH);
    507     int lx = wrapI(wrapS, iPixelU, sourceW);
    508     int ly = wrapI(wrapT, iPixelV, sourceH);
    509 
    510     return getBilinearSample2D(alloc, w0, w1, w2, w3, lx, ly, nx, ny, dk, dt, lod);
    511 
    512 }
    513 
    514 static float4 __attribute__((overloadable))
    515         sample_LOD_NearestPixel(const Allocation_t *alloc,
    516                                 rs_data_kind dk, rs_data_type dt,
    517                                 rs_sampler_value wrapS,
    518                                 rs_sampler_value wrapT,
    519                                 float2 uv, uint32_t lod) {
    520     int sourceW = alloc->mHal.drvState.lod[lod].dimX;
    521     int sourceH = alloc->mHal.drvState.lod[lod].dimY;
    522 
    523     float2 dimF;
    524     dimF.x = (float)(sourceW);
    525     dimF.y = (float)(sourceH);
    526     int2 iPixel = convert_int2(uv * dimF);
    527 
    528     uint2 location;
    529     location.x = wrapI(wrapS, iPixel.x, sourceW);
    530     location.y = wrapI(wrapT, iPixel.y, sourceH);
    531     return getNearestSample(alloc, location, dk, dt, lod);
    532 }
    533 
    534 extern const float4 __attribute__((overloadable))
    535         rsSample(rs_allocation a, rs_sampler s, float uv, float lod) {
    536 
    537     const Allocation_t *alloc = (const Allocation_t *)a.p;
    538     const Sampler_t *prog = (Sampler_t *)s.p;
    539     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    540     const Element_t *elem = type->mHal.state.element;
    541     rs_data_kind dk = elem->mHal.state.dataKind;
    542     rs_data_type dt = elem->mHal.state.dataType;
    543     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
    544     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
    545     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    546 
    547     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    548         return 0.f;
    549     }
    550 
    551     if (lod <= 0.0f) {
    552         if (sampleMag == RS_SAMPLER_NEAREST) {
    553             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
    554         }
    555         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, 0);
    556     }
    557 
    558     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
    559         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    560         lod = min(lod, (float)maxLOD);
    561         uint32_t nearestLOD = (uint32_t)round(lod);
    562         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, nearestLOD);
    563     }
    564 
    565     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
    566         uint32_t lod0 = (uint32_t)floor(lod);
    567         uint32_t lod1 = (uint32_t)ceil(lod);
    568         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    569         lod0 = min(lod0, maxLOD);
    570         lod1 = min(lod1, maxLOD);
    571         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod0);
    572         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, uv, lod1);
    573         float frac = lod - (float)lod0;
    574         return sample0 * (1.0f - frac) + sample1 * frac;
    575     }
    576 
    577     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, uv, 0);
    578 }
    579 
    580 extern const float4 __attribute__((overloadable))
    581         rsSample(rs_allocation a, rs_sampler s, float location) {
    582     return rsSample(a, s, location, 0);
    583 }
    584 
    585 
    586 extern const float4 __attribute__((overloadable))
    587         rsSample(rs_allocation a, rs_sampler s, float2 uv, float lod) {
    588 
    589     const Allocation_t *alloc = (const Allocation_t *)a.p;
    590     const Sampler_t *prog = (Sampler_t *)s.p;
    591     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    592     const Element_t *elem = type->mHal.state.element;
    593     rs_data_kind dk = elem->mHal.state.dataKind;
    594     rs_data_type dt = elem->mHal.state.dataType;
    595     rs_sampler_value sampleMin = prog->mHal.state.minFilter;
    596     rs_sampler_value sampleMag = prog->mHal.state.magFilter;
    597     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    598     rs_sampler_value wrapT = prog->mHal.state.wrapT;
    599 
    600     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    601         return 0.f;
    602     }
    603 
    604     if (lod <= 0.0f) {
    605         if (sampleMag == RS_SAMPLER_NEAREST) {
    606             return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    607         }
    608         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    609     }
    610 
    611     if (sampleMin == RS_SAMPLER_LINEAR_MIP_NEAREST) {
    612         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    613         lod = min(lod, (float)maxLOD);
    614         uint32_t nearestLOD = (uint32_t)round(lod);
    615         return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, nearestLOD);
    616     }
    617 
    618     if (sampleMin == RS_SAMPLER_LINEAR_MIP_LINEAR) {
    619         uint32_t lod0 = (uint32_t)floor(lod);
    620         uint32_t lod1 = (uint32_t)ceil(lod);
    621         uint32_t maxLOD = type->mHal.state.lodCount - 1;
    622         lod0 = min(lod0, maxLOD);
    623         lod1 = min(lod1, maxLOD);
    624         float4 sample0 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod0);
    625         float4 sample1 = sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, lod1);
    626         float frac = lod - (float)lod0;
    627         return sample0 * (1.0f - frac) + sample1 * frac;
    628     }
    629 
    630     return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    631 }
    632 
    633 extern const float4 __attribute__((overloadable))
    634         rsSample(rs_allocation a, rs_sampler s, float2 uv) {
    635 
    636     const Allocation_t *alloc = (const Allocation_t *)a.p;
    637     const Sampler_t *prog = (Sampler_t *)s.p;
    638     const Type_t *type = (Type_t *)alloc->mHal.state.type;
    639     const Element_t *elem = type->mHal.state.element;
    640     rs_data_kind dk = elem->mHal.state.dataKind;
    641     rs_data_type dt = elem->mHal.state.dataType;
    642     rs_sampler_value wrapS = prog->mHal.state.wrapS;
    643     rs_sampler_value wrapT = prog->mHal.state.wrapT;
    644 
    645     if (!(alloc->mHal.state.usageFlags & RS_ALLOCATION_USAGE_GRAPHICS_TEXTURE)) {
    646         return 0.f;
    647     }
    648 
    649     if (prog->mHal.state.magFilter == RS_SAMPLER_NEAREST) {
    650         return sample_LOD_NearestPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    651     }
    652     return sample_LOD_LinearPixel(alloc, dk, dt, wrapS, wrapT, uv, 0);
    653 }
    654 
    655