Home | History | Annotate | Download | only in glsl
      1 /* Compile with:
      2  *
      3  * glsl_compiler --version 400 --dump-builder int64.glsl > builtin_int64.h
      4  *
      5  * Version 4.00+ is required for umulExtended.
      6  */
      7 #version 400
      8 #extension GL_ARB_gpu_shader_int64: require
      9 #extension GL_ARB_shading_language_420pack: require
     10 
     11 uvec2
     12 umul64(uvec2 a, uvec2 b)
     13 {
     14    uvec2 result;
     15 
     16    umulExtended(a.x, b.x, result.y, result.x);
     17    result.y += a.x * b.y + a.y * b.x;
     18 
     19    return result;
     20 }
     21 
     22 ivec2
     23 sign64(ivec2 a)
     24 {
     25    ivec2 result;
     26 
     27    result.y = a.y >> 31;
     28    result.x = result.y | int((a.x | a.y) != 0);
     29 
     30    return result;
     31 }
     32 
     33 uvec4
     34 udivmod64(uvec2 n, uvec2 d)
     35 {
     36    uvec2 quot = uvec2(0U, 0U);
     37    int log2_denom = findMSB(d.y) + 32;
     38 
     39    /* If the upper 32 bits of denom are non-zero, it is impossible for shifts
     40     * greater than 32 bits to occur.  If the upper 32 bits of the numerator
     41     * are zero, it is impossible for (denom << [63, 32]) <= numer unless
     42     * denom == 0.
     43     */
     44    if (d.y == 0 && n.y >= d.x) {
     45       log2_denom = findMSB(d.x);
     46 
     47       /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we
     48        * don't have to compare log2_denom inside the loop as is done in the
     49        * general case (below).
     50        */
     51       for (int i = 31; i >= 1; i--) {
     52 	 if (log2_denom <= 31 - i && (d.x << i) <= n.y) {
     53 	    n.y -= d.x << i;
     54 	    quot.y |= 1U << i;
     55 	 }
     56       }
     57 
     58       /* log2_denom is always <= 31, so manually peel the last loop
     59        * iteration.
     60        */
     61       if (d.x <= n.y) {
     62 	 n.y -= d.x;
     63 	 quot.y |= 1U;
     64       }
     65    }
     66 
     67    uint64_t d64 = packUint2x32(d);
     68    uint64_t n64 = packUint2x32(n);
     69    for (int i = 31; i >= 1; i--) {
     70       if (log2_denom <= 63 - i && (d64 << i) <= n64) {
     71 	 n64 -= d64 << i;
     72 	 quot.x |= 1U << i;
     73       }
     74    }
     75 
     76    /* log2_denom is always <= 63, so manually peel the last loop
     77     * iteration.
     78     */
     79    if (d64 <= n64) {
     80       n64 -= d64;
     81       quot.x |= 1U;
     82    }
     83 
     84    return uvec4(quot, unpackUint2x32(n64));
     85 }
     86 
     87 uvec2
     88 udiv64(uvec2 n, uvec2 d)
     89 {
     90    return udivmod64(n, d).xy;
     91 }
     92 
     93 ivec2
     94 idiv64(ivec2 _n, ivec2 _d)
     95 {
     96    const bool negate = (_n.y < 0) != (_d.y < 0);
     97    uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
     98    uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
     99 
    100    uvec2 quot = udivmod64(n, d).xy;
    101 
    102    return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot);
    103 }
    104 
    105 uvec2
    106 umod64(uvec2 n, uvec2 d)
    107 {
    108    return udivmod64(n, d).zw;
    109 }
    110 
    111 ivec2
    112 imod64(ivec2 _n, ivec2 _d)
    113 {
    114    const bool negate = (_n.y < 0) != (_d.y < 0);
    115    uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n))));
    116    uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d))));
    117 
    118    uvec2 rem = udivmod64(n, d).zw;
    119 
    120    return negate ? unpackInt2x32(-int64_t(packUint2x32(rem))) : ivec2(rem);
    121 }
    122