Home | History | Annotate | Download | only in CellSPU
      1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
      2 //
      3 //                     Cell SPU math operations
      4 //
      5 // This target description file contains instruction sequences for various
      6 // math operations, such as vector multiplies, i32 multiply, etc., for the
      7 // SPU's i32, i16 i8 and corresponding vector types.
      8 //
      9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
     10 // purely and completely coincidental.
     11 //===----------------------------------------------------------------------===//
     12 
     13 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     14 // v16i8 multiply instruction sequence:
     15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     16 
     17 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
     18           (ORv4i32
     19            (ANDv4i32
     20             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
     21                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
     22                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
     23                        (FSMBIv8i16 0x2222)),
     24             (ILAv4i32 0x0000ffff)),
     25            (SHLIv4i32
     26             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
     27                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
     28                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
     29                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
     30                        (FSMBIv8i16 0x2222)), 16))>;
     31                         
     32 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     33 // v8i16 multiply instruction sequence:
     34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     35 
     36 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
     37           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
     38                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
     39                      (FSMBIv8i16 0xcccc))>;
     40                  
     41 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     42 // v4i32, i32 multiply instruction sequence:
     43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     44 
     45 def MPYv4i32:
     46   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
     47       (Av4i32
     48         (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
     49                        (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
     50         (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
     51 
     52 def MPYi32:
     53   Pat<(mul R32C:$rA, R32C:$rB),
     54       (Ar32
     55         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
     56               (MPYHr32 R32C:$rB, R32C:$rA)),
     57         (MPYUr32 R32C:$rA, R32C:$rB))>;
     58 
     59 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     60 // f32, v4f32 divide instruction sequence:
     61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
     62 
     63 // Reciprocal estimate and interpolation
     64 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
     65 // Division estimate
     66 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
     67 // Newton-Raphson iteration
     68 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
     69                                Interpf32.Fragment,
     70                                DivEstf32.Fragment)>;
     71 // Epsilon addition
     72 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
     73 
     74 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
     75           (SELBf32_cond NRaphf32.Fragment,
     76                         Epsilonf32.Fragment,
     77                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
     78 
     79 // Reciprocal estimate and interpolation
     80 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
     81 // Division estimate
     82 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
     83 // Newton-Raphson iteration
     84 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
     85                                               (v4f32 VECREG:$rB),
     86                                               (v4f32 VECREG:$rA)),
     87                                    Interpv4f32.Fragment,
     88                                    DivEstv4f32.Fragment)>;
     89 // Epsilon addition
     90 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
     91 
     92 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
     93           (SELBv4f32_cond NRaphv4f32.Fragment,
     94                         Epsilonv4f32.Fragment,
     95                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
     96                                               Epsilonv4f32.Fragment,
     97                                               (v4f32 VECREG:$rA)), -1))>;
     98