1 //===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// 2 // 3 // Cell SPU math operations 4 // 5 // This target description file contains instruction sequences for various 6 // math operations, such as vector multiplies, i32 multiply, etc., for the 7 // SPU's i32, i16 i8 and corresponding vector types. 8 // 9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is 10 // purely and completely coincidental. 11 //===----------------------------------------------------------------------===// 12 13 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 14 // v16i8 multiply instruction sequence: 15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 16 17 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), 18 (ORv4i32 19 (ANDv4i32 20 (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), 21 (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), 22 (ROTMAHIv8i16 VECREG:$rB, 8)), 8), 23 (FSMBIv8i16 0x2222)), 24 (ILAv4i32 0x0000ffff)), 25 (SHLIv4i32 26 (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), 27 (ROTMAIv4i32_i32 VECREG:$rB, 16)), 28 (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), 29 (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), 30 (FSMBIv8i16 0x2222)), 16))>; 31 32 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 33 // v8i16 multiply instruction sequence: 34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 35 36 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), 37 (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), 38 (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), 39 (FSMBIv8i16 0xcccc))>; 40 41 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 42 // v4i32, i32 multiply instruction sequence: 43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 44 45 def MPYv4i32: 46 Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), 47 (Av4i32 48 (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), 49 (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), 50 (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; 51 52 def MPYi32: 53 Pat<(mul R32C:$rA, R32C:$rB), 54 (Ar32 55 (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), 56 (MPYHr32 R32C:$rB, R32C:$rA)), 57 (MPYUr32 R32C:$rA, R32C:$rB))>; 58 59 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 60 // f32, v4f32 divide instruction sequence: 61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 62 63 // Reciprocal estimate and interpolation 64 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; 65 // Division estimate 66 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; 67 // Newton-Raphson iteration 68 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), 69 Interpf32.Fragment, 70 DivEstf32.Fragment)>; 71 // Epsilon addition 72 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; 73 74 def : Pat<(fdiv R32FP:$rA, R32FP:$rB), 75 (SELBf32_cond NRaphf32.Fragment, 76 Epsilonf32.Fragment, 77 (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; 78 79 // Reciprocal estimate and interpolation 80 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; 81 // Division estimate 82 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; 83 // Newton-Raphson iteration 84 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, 85 (v4f32 VECREG:$rB), 86 (v4f32 VECREG:$rA)), 87 Interpv4f32.Fragment, 88 DivEstv4f32.Fragment)>; 89 // Epsilon addition 90 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; 91 92 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), 93 (SELBv4f32_cond NRaphv4f32.Fragment, 94 Epsilonv4f32.Fragment, 95 (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), 96 Epsilonv4f32.Fragment, 97 (v4f32 VECREG:$rA)), -1))>; 98