1 //====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===// 2 // 3 // Cell SPU 64-bit operations 4 // 5 //===----------------------------------------------------------------------===// 6 7 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 8 // 64-bit comparisons: 9 // 10 // 1. The instruction sequences for vector vice scalar differ by a 11 // constant. In the scalar case, we're only interested in the 12 // top two 32-bit slots, whereas we're interested in an exact 13 // all-four-slot match in the vector case. 14 // 15 // 2. There are no "immediate" forms, since loading 64-bit constants 16 // could be a constant pool load. 17 // 18 // 3. i64 setcc results are i32, which are subsequently converted to a FSM 19 // mask when used in a select pattern. 20 // 21 // 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) 22 // [Note: this may be moot, since gb produces v4i32 or r32.] 23 // 24 // 5. The code sequences for r64 and v2i64 are probably overly conservative, 25 // compared to the code that gcc produces. 26 // 27 // M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) 28 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 29 30 // selb instruction definition for i64. Note that the selection mask is 31 // a vector, produced by various forms of FSM: 32 def SELBr64_cond: 33 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), 34 [/* no pattern */]>; 35 36 // The generic i64 select pattern, which assumes that the comparison result 37 // is in a 32-bit register that contains a select mask pattern (i.e., gather 38 // bits result): 39 40 def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), 41 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; 42 43 // select the negative condition: 44 class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: 45 Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), 46 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; 47 48 // setcc the negative condition: 49 class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: 50 Pat<(cond R64C:$rA, R64C:$rB), 51 (XORIr32 compare.Fragment, -1)>; 52 53 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 54 // The i64 seteq fragment that does the scalar->vector conversion and 55 // comparison: 56 def CEQr64compare: 57 CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 58 (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; 59 60 // The i64 seteq fragment that does the vector comparison 61 def CEQv2i64compare: 62 CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; 63 64 // i64 seteq (equality): the setcc result is i32, which is converted to a 65 // vector FSM mask when used in a select pattern. 66 // 67 // v2i64 seteq (equality): the setcc result is v4i32 68 multiclass CompareEqual64 { 69 // Plain old comparison, converts back to i32 scalar 70 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; 71 def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; 72 73 // SELB mask from FSM: 74 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 75 (FSMv4i32 CEQr64compare.Fragment), R32C))>; 76 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 77 (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; 78 } 79 80 defm I64EQ: CompareEqual64; 81 82 def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; 83 def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; 84 85 // i64 setne: 86 def : I64SETCCNegCond<setne, I64EQr64>; 87 def : I64SELECTNegCond<setne, I64EQr64>; 88 89 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 90 // i64 setugt/setule: 91 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 92 93 def CLGTr64ugt: 94 CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 95 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 96 97 def CLGTr64eq: 98 CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 99 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 100 101 def CLGTr64compare: 102 CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, 103 (XSWDv2i64 CLGTr64ugt.Fragment), 104 CLGTr64eq.Fragment)>; 105 106 def CLGTv2i64ugt: 107 CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; 108 109 def CLGTv2i64eq: 110 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; 111 112 def CLGTv2i64compare: 113 CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, 114 (XSWDv2i64 CLGTr64ugt.Fragment), 115 CLGTv2i64eq.Fragment)>; 116 117 multiclass CompareLogicalGreaterThan64 { 118 // Plain old comparison, converts back to i32 scalar 119 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; 120 def v2i64: CodeFrag<CLGTv2i64compare.Fragment>; 121 122 // SELB mask from FSM: 123 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 124 (FSMv4i32 CLGTr64compare.Fragment), R32C))>; 125 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 126 (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; 127 } 128 129 defm I64LGT: CompareLogicalGreaterThan64; 130 131 def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; 132 //def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), 133 // I64LGTv2i64.Fragment>; 134 135 // i64 setult: 136 def : I64SETCCNegCond<setule, I64LGTr64>; 137 def : I64SELECTNegCond<setule, I64LGTr64>; 138 139 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 140 // i64 setuge/setult: 141 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 142 143 def CLGEr64compare: 144 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, 145 CLGTr64eq.Fragment)), 0xb)>; 146 147 def CLGEv2i64compare: 148 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, 149 CLGTv2i64eq.Fragment)), 0xf)>; 150 151 multiclass CompareLogicalGreaterEqual64 { 152 // Plain old comparison, converts back to i32 scalar 153 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; 154 def v2i64: CodeFrag<CLGEv2i64compare.Fragment>; 155 156 // SELB mask from FSM: 157 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 158 (FSMv4i32 CLGEr64compare.Fragment), R32C))>; 159 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 160 (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; 161 } 162 163 defm I64LGE: CompareLogicalGreaterEqual64; 164 165 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; 166 def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), 167 I64LGEv2i64.Fragment>; 168 169 170 // i64 setult: 171 def : I64SETCCNegCond<setult, I64LGEr64>; 172 def : I64SELECTNegCond<setult, I64LGEr64>; 173 174 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 175 // i64 setgt/setle: 176 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 177 178 def CGTr64sgt: 179 CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 180 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 181 182 def CGTr64eq: 183 CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 184 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 185 186 def CGTr64compare: 187 CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, 188 (XSWDv2i64 CGTr64sgt.Fragment), 189 CGTr64eq.Fragment)>; 190 191 def CGTv2i64sgt: 192 CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; 193 194 def CGTv2i64eq: 195 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; 196 197 def CGTv2i64compare: 198 CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, 199 (XSWDv2i64 CGTr64sgt.Fragment), 200 CGTv2i64eq.Fragment)>; 201 202 multiclass CompareGreaterThan64 { 203 // Plain old comparison, converts back to i32 scalar 204 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; 205 def v2i64: CodeFrag<CGTv2i64compare.Fragment>; 206 207 // SELB mask from FSM: 208 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 209 (FSMv4i32 CGTr64compare.Fragment), R32C))>; 210 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 211 (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; 212 } 213 214 defm I64GT: CompareLogicalGreaterThan64; 215 216 def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; 217 //def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), 218 // I64GTv2i64.Fragment>; 219 220 // i64 setult: 221 def : I64SETCCNegCond<setle, I64GTr64>; 222 def : I64SELECTNegCond<setle, I64GTr64>; 223 224 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 225 // i64 setge/setlt: 226 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 227 228 def CGEr64compare: 229 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, 230 CGTr64eq.Fragment)), 0xb)>; 231 232 def CGEv2i64compare: 233 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, 234 CGTv2i64eq.Fragment)), 0xf)>; 235 236 multiclass CompareGreaterEqual64 { 237 // Plain old comparison, converts back to i32 scalar 238 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; 239 def v2i64: CodeFrag<CGEv2i64compare.Fragment>; 240 241 // SELB mask from FSM: 242 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; 243 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; 244 } 245 246 defm I64GE: CompareGreaterEqual64; 247 248 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; 249 def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), 250 I64GEv2i64.Fragment>; 251 252 // i64 setult: 253 def : I64SETCCNegCond<setlt, I64GEr64>; 254 def : I64SELECTNegCond<setlt, I64GEr64>; 255 256 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 257 // v2i64, i64 add 258 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 259 260 class v2i64_add_cg<dag lhs, dag rhs>: 261 CodeFrag<(CGv4i32 lhs, rhs)>; 262 263 class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>: 264 CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; 265 266 class v2i64_add<dag lhs, dag rhs, dag cg_mask>: 267 v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>; 268 269 def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 270 (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), 271 (COPY_TO_REGCLASS R64C:$rB, VECREG), 272 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 273 274 def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 275 (v4i32 VECREG:$rCGmask)), 276 v2i64_add<(v2i64 VECREG:$rA), 277 (v2i64 VECREG:$rB), 278 (v4i32 VECREG:$rCGmask)>.Fragment>; 279 280 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 281 // v2i64, i64 subtraction 282 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 283 284 class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>; 285 286 class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>: 287 CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; 288 289 def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 290 (COPY_TO_REGCLASS 291 v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), 292 (COPY_TO_REGCLASS R64C:$rB, VECREG), 293 v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), 294 (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, 295 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 296 297 def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 298 (v4i32 VECREG:$rCGmask)), 299 v2i64_sub<(v2i64 VECREG:$rA), 300 (v2i64 VECREG:$rB), 301 v2i64_sub_bg<(v2i64 VECREG:$rA), 302 (v2i64 VECREG:$rB)>.Fragment, 303 (v4i32 VECREG:$rCGmask)>.Fragment>; 304 305 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 306 // v2i64, i64 multiply 307 // 308 // Note: i64 multiply is simply the vector->scalar conversion of the 309 // full-on v2i64 multiply, since the entire vector has to be manipulated 310 // anyway. 311 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 312 313 class v2i64_mul_ahi64<dag rA> : 314 CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; 315 316 class v2i64_mul_bhi64<dag rB> : 317 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; 318 319 class v2i64_mul_alo64<dag rB> : 320 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; 321 322 class v2i64_mul_blo64<dag rB> : 323 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; 324 325 class v2i64_mul_ashlq2<dag rA>: 326 CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; 327 328 class v2i64_mul_ashlq4<dag rA>: 329 CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; 330 331 class v2i64_mul_bshlq2<dag rB> : 332 CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; 333 334 class v2i64_mul_bshlq4<dag rB> : 335 CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; 336 337 class v2i64_highprod<dag rA, dag rB>: 338 CodeFrag<(Av4i32 339 (Av4i32 340 (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3 341 v2i64_mul_ahi64<rA>.Fragment), 342 (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3 343 v2i64_mul_bshlq4<rB>.Fragment)), 344 (Av4i32 345 (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment, 346 v2i64_mul_ashlq4<rA>.Fragment), 347 (Av4i32 348 (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment, 349 v2i64_mul_bhi64<rB>.Fragment), 350 (Av4i32 351 (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment, 352 v2i64_mul_bhi64<rB>.Fragment), 353 (Av4i32 354 (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment, 355 v2i64_mul_bshlq2<rB>.Fragment), 356 (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment, 357 v2i64_mul_bshlq2<rB>.Fragment))))))>; 358 359 class v2i64_mul_a3_b3<dag rA, dag rB>: 360 CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment, 361 v2i64_mul_blo64<rB>.Fragment)>; 362 363 class v2i64_mul_a2_b3<dag rA, dag rB>: 364 CodeFrag<(SELBv4i32 (SHLQBYIv4i32 365 (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment, 366 v2i64_mul_bshlq2<rB>.Fragment), 0x2), 367 (ILv4i32 0), 368 (FSMBIv4i32 0xc3c3))>; 369 370 class v2i64_mul_a3_b2<dag rA, dag rB>: 371 CodeFrag<(SELBv4i32 (SHLQBYIv4i32 372 (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment, 373 v2i64_mul_ashlq2<rA>.Fragment), 0x2), 374 (ILv4i32 0), 375 (FSMBIv4i32 0xc3c3))>; 376 377 class v2i64_lowsum<dag rA, dag rB, dag rCGmask>: 378 v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment, 379 v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment, 380 v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>; 381 382 class v2i64_mul<dag rA, dag rB, dag rCGmask>: 383 v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment, 384 (SELBv4i32 v2i64_highprod<rA, rB>.Fragment, 385 (ILv4i32 0), 386 (FSMBIv4i32 0x0f0f)), 387 rCGmask>; 388 389 def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 390 (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), 391 (COPY_TO_REGCLASS R64C:$rB, VECREG), 392 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 393 394 def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 395 (v4i32 VECREG:$rCGmask)), 396 v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 397 (v4i32 VECREG:$rCGmask)>.Fragment>; 398 399 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 400 // f64 comparisons 401 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 402 403 // selb instruction definition for i64. Note that the selection mask is 404 // a vector, produced by various forms of FSM: 405 def SELBf64_cond: 406 SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), 407 [(set R64FP:$rT, 408 (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; 409