1 // Derived from Inferno utils/8c/txt.c 2 // http://code.google.com/p/inferno-os/source/browse/utils/8c/txt.c 3 // 4 // Copyright 1994-1999 Lucent Technologies Inc. All rights reserved. 5 // Portions Copyright 1995-1997 C H Forsyth (forsyth (a] terzarima.net) 6 // Portions Copyright 1997-1999 Vita Nuova Limited 7 // Portions Copyright 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com) 8 // Portions Copyright 2004,2006 Bruce Ellis 9 // Portions Copyright 2005-2007 C H Forsyth (forsyth (a] terzarima.net) 10 // Revisions Copyright 2000-2007 Lucent Technologies Inc. and others 11 // Portions Copyright 2009 The Go Authors. All rights reserved. 12 // 13 // Permission is hereby granted, free of charge, to any person obtaining a copy 14 // of this software and associated documentation files (the "Software"), to deal 15 // in the Software without restriction, including without limitation the rights 16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 17 // copies of the Software, and to permit persons to whom the Software is 18 // furnished to do so, subject to the following conditions: 19 // 20 // The above copyright notice and this permission notice shall be included in 21 // all copies or substantial portions of the Software. 22 // 23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 29 // THE SOFTWARE. 30 31 package x86 32 33 import ( 34 "cmd/compile/internal/big" 35 "cmd/compile/internal/gc" 36 "cmd/internal/obj" 37 "cmd/internal/obj/x86" 38 "fmt" 39 ) 40 41 // TODO(rsc): Can make this bigger if we move 42 // the text segment up higher in 8l for all GOOS. 43 // At the same time, can raise StackBig in ../../runtime/stack.h. 44 var unmappedzero uint32 = 4096 45 46 // foptoas flags 47 const ( 48 Frev = 1 << 0 49 Fpop = 1 << 1 50 Fpop2 = 1 << 2 51 ) 52 53 /* 54 * return Axxx for Oxxx on type t. 55 */ 56 func optoas(op int, t *gc.Type) int { 57 if t == nil { 58 gc.Fatal("optoas: t is nil") 59 } 60 61 a := obj.AXXX 62 switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) { 63 default: 64 gc.Fatal("optoas: no entry %v-%v", gc.Oconv(int(op), 0), t) 65 66 case gc.OADDR<<16 | gc.TPTR32: 67 a = x86.ALEAL 68 69 case gc.OEQ<<16 | gc.TBOOL, 70 gc.OEQ<<16 | gc.TINT8, 71 gc.OEQ<<16 | gc.TUINT8, 72 gc.OEQ<<16 | gc.TINT16, 73 gc.OEQ<<16 | gc.TUINT16, 74 gc.OEQ<<16 | gc.TINT32, 75 gc.OEQ<<16 | gc.TUINT32, 76 gc.OEQ<<16 | gc.TINT64, 77 gc.OEQ<<16 | gc.TUINT64, 78 gc.OEQ<<16 | gc.TPTR32, 79 gc.OEQ<<16 | gc.TPTR64, 80 gc.OEQ<<16 | gc.TFLOAT32, 81 gc.OEQ<<16 | gc.TFLOAT64: 82 a = x86.AJEQ 83 84 case gc.ONE<<16 | gc.TBOOL, 85 gc.ONE<<16 | gc.TINT8, 86 gc.ONE<<16 | gc.TUINT8, 87 gc.ONE<<16 | gc.TINT16, 88 gc.ONE<<16 | gc.TUINT16, 89 gc.ONE<<16 | gc.TINT32, 90 gc.ONE<<16 | gc.TUINT32, 91 gc.ONE<<16 | gc.TINT64, 92 gc.ONE<<16 | gc.TUINT64, 93 gc.ONE<<16 | gc.TPTR32, 94 gc.ONE<<16 | gc.TPTR64, 95 gc.ONE<<16 | gc.TFLOAT32, 96 gc.ONE<<16 | gc.TFLOAT64: 97 a = x86.AJNE 98 99 case gc.OLT<<16 | gc.TINT8, 100 gc.OLT<<16 | gc.TINT16, 101 gc.OLT<<16 | gc.TINT32, 102 gc.OLT<<16 | gc.TINT64: 103 a = x86.AJLT 104 105 case gc.OLT<<16 | gc.TUINT8, 106 gc.OLT<<16 | gc.TUINT16, 107 gc.OLT<<16 | gc.TUINT32, 108 gc.OLT<<16 | gc.TUINT64: 109 a = x86.AJCS 110 111 case gc.OLE<<16 | gc.TINT8, 112 gc.OLE<<16 | gc.TINT16, 113 gc.OLE<<16 | gc.TINT32, 114 gc.OLE<<16 | gc.TINT64: 115 a = x86.AJLE 116 117 case gc.OLE<<16 | gc.TUINT8, 118 gc.OLE<<16 | gc.TUINT16, 119 gc.OLE<<16 | gc.TUINT32, 120 gc.OLE<<16 | gc.TUINT64: 121 a = x86.AJLS 122 123 case gc.OGT<<16 | gc.TINT8, 124 gc.OGT<<16 | gc.TINT16, 125 gc.OGT<<16 | gc.TINT32, 126 gc.OGT<<16 | gc.TINT64: 127 a = x86.AJGT 128 129 case gc.OGT<<16 | gc.TUINT8, 130 gc.OGT<<16 | gc.TUINT16, 131 gc.OGT<<16 | gc.TUINT32, 132 gc.OGT<<16 | gc.TUINT64, 133 gc.OLT<<16 | gc.TFLOAT32, 134 gc.OLT<<16 | gc.TFLOAT64: 135 a = x86.AJHI 136 137 case gc.OGE<<16 | gc.TINT8, 138 gc.OGE<<16 | gc.TINT16, 139 gc.OGE<<16 | gc.TINT32, 140 gc.OGE<<16 | gc.TINT64: 141 a = x86.AJGE 142 143 case gc.OGE<<16 | gc.TUINT8, 144 gc.OGE<<16 | gc.TUINT16, 145 gc.OGE<<16 | gc.TUINT32, 146 gc.OGE<<16 | gc.TUINT64, 147 gc.OLE<<16 | gc.TFLOAT32, 148 gc.OLE<<16 | gc.TFLOAT64: 149 a = x86.AJCC 150 151 case gc.OCMP<<16 | gc.TBOOL, 152 gc.OCMP<<16 | gc.TINT8, 153 gc.OCMP<<16 | gc.TUINT8: 154 a = x86.ACMPB 155 156 case gc.OCMP<<16 | gc.TINT16, 157 gc.OCMP<<16 | gc.TUINT16: 158 a = x86.ACMPW 159 160 case gc.OCMP<<16 | gc.TINT32, 161 gc.OCMP<<16 | gc.TUINT32, 162 gc.OCMP<<16 | gc.TPTR32: 163 a = x86.ACMPL 164 165 case gc.OAS<<16 | gc.TBOOL, 166 gc.OAS<<16 | gc.TINT8, 167 gc.OAS<<16 | gc.TUINT8: 168 a = x86.AMOVB 169 170 case gc.OAS<<16 | gc.TINT16, 171 gc.OAS<<16 | gc.TUINT16: 172 a = x86.AMOVW 173 174 case gc.OAS<<16 | gc.TINT32, 175 gc.OAS<<16 | gc.TUINT32, 176 gc.OAS<<16 | gc.TPTR32: 177 a = x86.AMOVL 178 179 case gc.OAS<<16 | gc.TFLOAT32: 180 a = x86.AMOVSS 181 182 case gc.OAS<<16 | gc.TFLOAT64: 183 a = x86.AMOVSD 184 185 case gc.OADD<<16 | gc.TINT8, 186 gc.OADD<<16 | gc.TUINT8: 187 a = x86.AADDB 188 189 case gc.OADD<<16 | gc.TINT16, 190 gc.OADD<<16 | gc.TUINT16: 191 a = x86.AADDW 192 193 case gc.OADD<<16 | gc.TINT32, 194 gc.OADD<<16 | gc.TUINT32, 195 gc.OADD<<16 | gc.TPTR32: 196 a = x86.AADDL 197 198 case gc.OSUB<<16 | gc.TINT8, 199 gc.OSUB<<16 | gc.TUINT8: 200 a = x86.ASUBB 201 202 case gc.OSUB<<16 | gc.TINT16, 203 gc.OSUB<<16 | gc.TUINT16: 204 a = x86.ASUBW 205 206 case gc.OSUB<<16 | gc.TINT32, 207 gc.OSUB<<16 | gc.TUINT32, 208 gc.OSUB<<16 | gc.TPTR32: 209 a = x86.ASUBL 210 211 case gc.OINC<<16 | gc.TINT8, 212 gc.OINC<<16 | gc.TUINT8: 213 a = x86.AINCB 214 215 case gc.OINC<<16 | gc.TINT16, 216 gc.OINC<<16 | gc.TUINT16: 217 a = x86.AINCW 218 219 case gc.OINC<<16 | gc.TINT32, 220 gc.OINC<<16 | gc.TUINT32, 221 gc.OINC<<16 | gc.TPTR32: 222 a = x86.AINCL 223 224 case gc.ODEC<<16 | gc.TINT8, 225 gc.ODEC<<16 | gc.TUINT8: 226 a = x86.ADECB 227 228 case gc.ODEC<<16 | gc.TINT16, 229 gc.ODEC<<16 | gc.TUINT16: 230 a = x86.ADECW 231 232 case gc.ODEC<<16 | gc.TINT32, 233 gc.ODEC<<16 | gc.TUINT32, 234 gc.ODEC<<16 | gc.TPTR32: 235 a = x86.ADECL 236 237 case gc.OCOM<<16 | gc.TINT8, 238 gc.OCOM<<16 | gc.TUINT8: 239 a = x86.ANOTB 240 241 case gc.OCOM<<16 | gc.TINT16, 242 gc.OCOM<<16 | gc.TUINT16: 243 a = x86.ANOTW 244 245 case gc.OCOM<<16 | gc.TINT32, 246 gc.OCOM<<16 | gc.TUINT32, 247 gc.OCOM<<16 | gc.TPTR32: 248 a = x86.ANOTL 249 250 case gc.OMINUS<<16 | gc.TINT8, 251 gc.OMINUS<<16 | gc.TUINT8: 252 a = x86.ANEGB 253 254 case gc.OMINUS<<16 | gc.TINT16, 255 gc.OMINUS<<16 | gc.TUINT16: 256 a = x86.ANEGW 257 258 case gc.OMINUS<<16 | gc.TINT32, 259 gc.OMINUS<<16 | gc.TUINT32, 260 gc.OMINUS<<16 | gc.TPTR32: 261 a = x86.ANEGL 262 263 case gc.OAND<<16 | gc.TINT8, 264 gc.OAND<<16 | gc.TUINT8: 265 a = x86.AANDB 266 267 case gc.OAND<<16 | gc.TINT16, 268 gc.OAND<<16 | gc.TUINT16: 269 a = x86.AANDW 270 271 case gc.OAND<<16 | gc.TINT32, 272 gc.OAND<<16 | gc.TUINT32, 273 gc.OAND<<16 | gc.TPTR32: 274 a = x86.AANDL 275 276 case gc.OOR<<16 | gc.TINT8, 277 gc.OOR<<16 | gc.TUINT8: 278 a = x86.AORB 279 280 case gc.OOR<<16 | gc.TINT16, 281 gc.OOR<<16 | gc.TUINT16: 282 a = x86.AORW 283 284 case gc.OOR<<16 | gc.TINT32, 285 gc.OOR<<16 | gc.TUINT32, 286 gc.OOR<<16 | gc.TPTR32: 287 a = x86.AORL 288 289 case gc.OXOR<<16 | gc.TINT8, 290 gc.OXOR<<16 | gc.TUINT8: 291 a = x86.AXORB 292 293 case gc.OXOR<<16 | gc.TINT16, 294 gc.OXOR<<16 | gc.TUINT16: 295 a = x86.AXORW 296 297 case gc.OXOR<<16 | gc.TINT32, 298 gc.OXOR<<16 | gc.TUINT32, 299 gc.OXOR<<16 | gc.TPTR32: 300 a = x86.AXORL 301 302 case gc.OLROT<<16 | gc.TINT8, 303 gc.OLROT<<16 | gc.TUINT8: 304 a = x86.AROLB 305 306 case gc.OLROT<<16 | gc.TINT16, 307 gc.OLROT<<16 | gc.TUINT16: 308 a = x86.AROLW 309 310 case gc.OLROT<<16 | gc.TINT32, 311 gc.OLROT<<16 | gc.TUINT32, 312 gc.OLROT<<16 | gc.TPTR32: 313 a = x86.AROLL 314 315 case gc.OLSH<<16 | gc.TINT8, 316 gc.OLSH<<16 | gc.TUINT8: 317 a = x86.ASHLB 318 319 case gc.OLSH<<16 | gc.TINT16, 320 gc.OLSH<<16 | gc.TUINT16: 321 a = x86.ASHLW 322 323 case gc.OLSH<<16 | gc.TINT32, 324 gc.OLSH<<16 | gc.TUINT32, 325 gc.OLSH<<16 | gc.TPTR32: 326 a = x86.ASHLL 327 328 case gc.ORSH<<16 | gc.TUINT8: 329 a = x86.ASHRB 330 331 case gc.ORSH<<16 | gc.TUINT16: 332 a = x86.ASHRW 333 334 case gc.ORSH<<16 | gc.TUINT32, 335 gc.ORSH<<16 | gc.TPTR32: 336 a = x86.ASHRL 337 338 case gc.ORSH<<16 | gc.TINT8: 339 a = x86.ASARB 340 341 case gc.ORSH<<16 | gc.TINT16: 342 a = x86.ASARW 343 344 case gc.ORSH<<16 | gc.TINT32: 345 a = x86.ASARL 346 347 case gc.OHMUL<<16 | gc.TINT8, 348 gc.OMUL<<16 | gc.TINT8, 349 gc.OMUL<<16 | gc.TUINT8: 350 a = x86.AIMULB 351 352 case gc.OHMUL<<16 | gc.TINT16, 353 gc.OMUL<<16 | gc.TINT16, 354 gc.OMUL<<16 | gc.TUINT16: 355 a = x86.AIMULW 356 357 case gc.OHMUL<<16 | gc.TINT32, 358 gc.OMUL<<16 | gc.TINT32, 359 gc.OMUL<<16 | gc.TUINT32, 360 gc.OMUL<<16 | gc.TPTR32: 361 a = x86.AIMULL 362 363 case gc.OHMUL<<16 | gc.TUINT8: 364 a = x86.AMULB 365 366 case gc.OHMUL<<16 | gc.TUINT16: 367 a = x86.AMULW 368 369 case gc.OHMUL<<16 | gc.TUINT32, 370 gc.OHMUL<<16 | gc.TPTR32: 371 a = x86.AMULL 372 373 case gc.ODIV<<16 | gc.TINT8, 374 gc.OMOD<<16 | gc.TINT8: 375 a = x86.AIDIVB 376 377 case gc.ODIV<<16 | gc.TUINT8, 378 gc.OMOD<<16 | gc.TUINT8: 379 a = x86.ADIVB 380 381 case gc.ODIV<<16 | gc.TINT16, 382 gc.OMOD<<16 | gc.TINT16: 383 a = x86.AIDIVW 384 385 case gc.ODIV<<16 | gc.TUINT16, 386 gc.OMOD<<16 | gc.TUINT16: 387 a = x86.ADIVW 388 389 case gc.ODIV<<16 | gc.TINT32, 390 gc.OMOD<<16 | gc.TINT32: 391 a = x86.AIDIVL 392 393 case gc.ODIV<<16 | gc.TUINT32, 394 gc.ODIV<<16 | gc.TPTR32, 395 gc.OMOD<<16 | gc.TUINT32, 396 gc.OMOD<<16 | gc.TPTR32: 397 a = x86.ADIVL 398 399 case gc.OEXTEND<<16 | gc.TINT16: 400 a = x86.ACWD 401 402 case gc.OEXTEND<<16 | gc.TINT32: 403 a = x86.ACDQ 404 } 405 406 return a 407 } 408 409 func foptoas(op int, t *gc.Type, flg int) int { 410 a := obj.AXXX 411 et := int(gc.Simtype[t.Etype]) 412 413 if !gc.Thearch.Use387 { 414 switch uint32(op)<<16 | uint32(et) { 415 default: 416 gc.Fatal("foptoas-sse: no entry %v-%v", gc.Oconv(int(op), 0), t) 417 418 case gc.OCMP<<16 | gc.TFLOAT32: 419 a = x86.AUCOMISS 420 421 case gc.OCMP<<16 | gc.TFLOAT64: 422 a = x86.AUCOMISD 423 424 case gc.OAS<<16 | gc.TFLOAT32: 425 a = x86.AMOVSS 426 427 case gc.OAS<<16 | gc.TFLOAT64: 428 a = x86.AMOVSD 429 430 case gc.OADD<<16 | gc.TFLOAT32: 431 a = x86.AADDSS 432 433 case gc.OADD<<16 | gc.TFLOAT64: 434 a = x86.AADDSD 435 436 case gc.OSUB<<16 | gc.TFLOAT32: 437 a = x86.ASUBSS 438 439 case gc.OSUB<<16 | gc.TFLOAT64: 440 a = x86.ASUBSD 441 442 case gc.OMUL<<16 | gc.TFLOAT32: 443 a = x86.AMULSS 444 445 case gc.OMUL<<16 | gc.TFLOAT64: 446 a = x86.AMULSD 447 448 case gc.ODIV<<16 | gc.TFLOAT32: 449 a = x86.ADIVSS 450 451 case gc.ODIV<<16 | gc.TFLOAT64: 452 a = x86.ADIVSD 453 } 454 455 return a 456 } 457 458 // If we need Fpop, it means we're working on 459 // two different floating-point registers, not memory. 460 // There the instruction only has a float64 form. 461 if flg&Fpop != 0 { 462 et = gc.TFLOAT64 463 } 464 465 // clear Frev if unneeded 466 switch op { 467 case gc.OADD, 468 gc.OMUL: 469 flg &^= Frev 470 } 471 472 switch uint32(op)<<16 | (uint32(et)<<8 | uint32(flg)) { 473 case gc.OADD<<16 | (gc.TFLOAT32<<8 | 0): 474 return x86.AFADDF 475 476 case gc.OADD<<16 | (gc.TFLOAT64<<8 | 0): 477 return x86.AFADDD 478 479 case gc.OADD<<16 | (gc.TFLOAT64<<8 | Fpop): 480 return x86.AFADDDP 481 482 case gc.OSUB<<16 | (gc.TFLOAT32<<8 | 0): 483 return x86.AFSUBF 484 485 case gc.OSUB<<16 | (gc.TFLOAT32<<8 | Frev): 486 return x86.AFSUBRF 487 488 case gc.OSUB<<16 | (gc.TFLOAT64<<8 | 0): 489 return x86.AFSUBD 490 491 case gc.OSUB<<16 | (gc.TFLOAT64<<8 | Frev): 492 return x86.AFSUBRD 493 494 case gc.OSUB<<16 | (gc.TFLOAT64<<8 | Fpop): 495 return x86.AFSUBDP 496 497 case gc.OSUB<<16 | (gc.TFLOAT64<<8 | (Fpop | Frev)): 498 return x86.AFSUBRDP 499 500 case gc.OMUL<<16 | (gc.TFLOAT32<<8 | 0): 501 return x86.AFMULF 502 503 case gc.OMUL<<16 | (gc.TFLOAT64<<8 | 0): 504 return x86.AFMULD 505 506 case gc.OMUL<<16 | (gc.TFLOAT64<<8 | Fpop): 507 return x86.AFMULDP 508 509 case gc.ODIV<<16 | (gc.TFLOAT32<<8 | 0): 510 return x86.AFDIVF 511 512 case gc.ODIV<<16 | (gc.TFLOAT32<<8 | Frev): 513 return x86.AFDIVRF 514 515 case gc.ODIV<<16 | (gc.TFLOAT64<<8 | 0): 516 return x86.AFDIVD 517 518 case gc.ODIV<<16 | (gc.TFLOAT64<<8 | Frev): 519 return x86.AFDIVRD 520 521 case gc.ODIV<<16 | (gc.TFLOAT64<<8 | Fpop): 522 return x86.AFDIVDP 523 524 case gc.ODIV<<16 | (gc.TFLOAT64<<8 | (Fpop | Frev)): 525 return x86.AFDIVRDP 526 527 case gc.OCMP<<16 | (gc.TFLOAT32<<8 | 0): 528 return x86.AFCOMF 529 530 case gc.OCMP<<16 | (gc.TFLOAT32<<8 | Fpop): 531 return x86.AFCOMFP 532 533 case gc.OCMP<<16 | (gc.TFLOAT64<<8 | 0): 534 return x86.AFCOMD 535 536 case gc.OCMP<<16 | (gc.TFLOAT64<<8 | Fpop): 537 return x86.AFCOMDP 538 539 case gc.OCMP<<16 | (gc.TFLOAT64<<8 | Fpop2): 540 return x86.AFCOMDPP 541 542 case gc.OMINUS<<16 | (gc.TFLOAT32<<8 | 0): 543 return x86.AFCHS 544 545 case gc.OMINUS<<16 | (gc.TFLOAT64<<8 | 0): 546 return x86.AFCHS 547 } 548 549 gc.Fatal("foptoas %v %v %#x", gc.Oconv(int(op), 0), t, flg) 550 return 0 551 } 552 553 var resvd = []int{ 554 // REG_DI, // for movstring 555 // REG_SI, // for movstring 556 557 x86.REG_AX, // for divide 558 x86.REG_CX, // for shift 559 x86.REG_DX, // for divide, context 560 x86.REG_SP, // for stack 561 } 562 563 /* 564 * generate 565 * as $c, reg 566 */ 567 func gconreg(as int, c int64, reg int) { 568 var n1 gc.Node 569 var n2 gc.Node 570 571 gc.Nodconst(&n1, gc.Types[gc.TINT64], c) 572 gc.Nodreg(&n2, gc.Types[gc.TINT64], reg) 573 gins(as, &n1, &n2) 574 } 575 576 /* 577 * generate 578 * as $c, n 579 */ 580 func ginscon(as int, c int64, n2 *gc.Node) { 581 var n1 gc.Node 582 gc.Nodconst(&n1, gc.Types[gc.TINT32], c) 583 gins(as, &n1, n2) 584 } 585 586 func ginscmp(op int, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog { 587 if gc.Isint[t.Etype] || int(t.Etype) == gc.Tptr { 588 if (n1.Op == gc.OLITERAL || n1.Op == gc.OADDR && n1.Left.Op == gc.ONAME) && n2.Op != gc.OLITERAL { 589 // Reverse comparison to place constant (including address constant) last. 590 op = gc.Brrev(op) 591 n1, n2 = n2, n1 592 } 593 } 594 595 // General case. 596 var r1, r2, g1, g2 gc.Node 597 if n1.Op == gc.ONAME && n1.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG { 598 r1 = *n1 599 } else { 600 gc.Regalloc(&r1, t, n1) 601 gc.Regalloc(&g1, n1.Type, &r1) 602 gc.Cgen(n1, &g1) 603 gmove(&g1, &r1) 604 } 605 if n2.Op == gc.OLITERAL && gc.Isint[t.Etype] || n2.Op == gc.OADDR && n2.Left.Op == gc.ONAME && n2.Left.Class == gc.PEXTERN { 606 r2 = *n2 607 } else { 608 gc.Regalloc(&r2, t, n2) 609 gc.Regalloc(&g2, n1.Type, &r2) 610 gc.Cgen(n2, &g2) 611 gmove(&g2, &r2) 612 } 613 gins(optoas(gc.OCMP, t), &r1, &r2) 614 if r1.Op == gc.OREGISTER { 615 gc.Regfree(&g1) 616 gc.Regfree(&r1) 617 } 618 if r2.Op == gc.OREGISTER { 619 gc.Regfree(&g2) 620 gc.Regfree(&r2) 621 } 622 return gc.Gbranch(optoas(op, t), nil, likely) 623 } 624 625 /* 626 * swap node contents 627 */ 628 func nswap(a *gc.Node, b *gc.Node) { 629 t := *a 630 *a = *b 631 *b = t 632 } 633 634 /* 635 * return constant i node. 636 * overwritten by next call, but useful in calls to gins. 637 */ 638 639 var ncon_n gc.Node 640 641 func ncon(i uint32) *gc.Node { 642 if ncon_n.Type == nil { 643 gc.Nodconst(&ncon_n, gc.Types[gc.TUINT32], 0) 644 } 645 ncon_n.SetInt(int64(i)) 646 return &ncon_n 647 } 648 649 var sclean [10]gc.Node 650 651 var nsclean int 652 653 /* 654 * n is a 64-bit value. fill in lo and hi to refer to its 32-bit halves. 655 */ 656 func split64(n *gc.Node, lo *gc.Node, hi *gc.Node) { 657 if !gc.Is64(n.Type) { 658 gc.Fatal("split64 %v", n.Type) 659 } 660 661 if nsclean >= len(sclean) { 662 gc.Fatal("split64 clean") 663 } 664 sclean[nsclean].Op = gc.OEMPTY 665 nsclean++ 666 switch n.Op { 667 default: 668 switch n.Op { 669 default: 670 var n1 gc.Node 671 if !dotaddable(n, &n1) { 672 gc.Igen(n, &n1, nil) 673 sclean[nsclean-1] = n1 674 } 675 676 n = &n1 677 678 case gc.ONAME: 679 if n.Class == gc.PPARAMREF { 680 var n1 gc.Node 681 gc.Cgen(n.Name.Heapaddr, &n1) 682 sclean[nsclean-1] = n1 683 n = &n1 684 } 685 686 // nothing 687 case gc.OINDREG: 688 break 689 } 690 691 *lo = *n 692 *hi = *n 693 lo.Type = gc.Types[gc.TUINT32] 694 if n.Type.Etype == gc.TINT64 { 695 hi.Type = gc.Types[gc.TINT32] 696 } else { 697 hi.Type = gc.Types[gc.TUINT32] 698 } 699 hi.Xoffset += 4 700 701 case gc.OLITERAL: 702 var n1 gc.Node 703 n.Convconst(&n1, n.Type) 704 i := n1.Int() 705 gc.Nodconst(lo, gc.Types[gc.TUINT32], int64(uint32(i))) 706 i >>= 32 707 if n.Type.Etype == gc.TINT64 { 708 gc.Nodconst(hi, gc.Types[gc.TINT32], int64(int32(i))) 709 } else { 710 gc.Nodconst(hi, gc.Types[gc.TUINT32], int64(uint32(i))) 711 } 712 } 713 } 714 715 func splitclean() { 716 if nsclean <= 0 { 717 gc.Fatal("splitclean") 718 } 719 nsclean-- 720 if sclean[nsclean].Op != gc.OEMPTY { 721 gc.Regfree(&sclean[nsclean]) 722 } 723 } 724 725 // set up nodes representing fp constants 726 var ( 727 zerof gc.Node 728 two63f gc.Node 729 two64f gc.Node 730 bignodes_did bool 731 ) 732 733 func bignodes() { 734 if bignodes_did { 735 return 736 } 737 bignodes_did = true 738 739 gc.Nodconst(&zerof, gc.Types[gc.TINT64], 0) 740 zerof.Convconst(&zerof, gc.Types[gc.TFLOAT64]) 741 742 var i big.Int 743 i.SetInt64(1) 744 i.Lsh(&i, 63) 745 var bigi gc.Node 746 747 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 748 bigi.SetBigInt(&i) 749 bigi.Convconst(&two63f, gc.Types[gc.TFLOAT64]) 750 751 gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0) 752 i.Lsh(&i, 1) 753 bigi.SetBigInt(&i) 754 bigi.Convconst(&two64f, gc.Types[gc.TFLOAT64]) 755 } 756 757 func memname(n *gc.Node, t *gc.Type) { 758 gc.Tempname(n, t) 759 n.Sym = gc.Lookup("." + n.Sym.Name[1:]) // keep optimizer from registerizing 760 n.Orig.Sym = n.Sym 761 } 762 763 func gmove(f *gc.Node, t *gc.Node) { 764 if gc.Debug['M'] != 0 { 765 fmt.Printf("gmove %v -> %v\n", f, t) 766 } 767 768 ft := gc.Simsimtype(f.Type) 769 tt := gc.Simsimtype(t.Type) 770 cvt := t.Type 771 772 if gc.Iscomplex[ft] || gc.Iscomplex[tt] { 773 gc.Complexmove(f, t) 774 return 775 } 776 777 if gc.Isfloat[ft] || gc.Isfloat[tt] { 778 floatmove(f, t) 779 return 780 } 781 782 // cannot have two integer memory operands; 783 // except 64-bit, which always copies via registers anyway. 784 var r1 gc.Node 785 var a int 786 if gc.Isint[ft] && gc.Isint[tt] && !gc.Is64(f.Type) && !gc.Is64(t.Type) && gc.Ismem(f) && gc.Ismem(t) { 787 goto hard 788 } 789 790 // convert constant to desired type 791 if f.Op == gc.OLITERAL { 792 var con gc.Node 793 f.Convconst(&con, t.Type) 794 f = &con 795 ft = gc.Simsimtype(con.Type) 796 } 797 798 // value -> value copy, only one memory operand. 799 // figure out the instruction to use. 800 // break out of switch for one-instruction gins. 801 // goto rdst for "destination must be register". 802 // goto hard for "convert to cvt type first". 803 // otherwise handle and return. 804 805 switch uint32(ft)<<16 | uint32(tt) { 806 default: 807 // should not happen 808 gc.Fatal("gmove %v -> %v", f, t) 809 return 810 811 /* 812 * integer copy and truncate 813 */ 814 case gc.TINT8<<16 | gc.TINT8, // same size 815 gc.TINT8<<16 | gc.TUINT8, 816 gc.TUINT8<<16 | gc.TINT8, 817 gc.TUINT8<<16 | gc.TUINT8: 818 a = x86.AMOVB 819 820 case gc.TINT16<<16 | gc.TINT8, // truncate 821 gc.TUINT16<<16 | gc.TINT8, 822 gc.TINT32<<16 | gc.TINT8, 823 gc.TUINT32<<16 | gc.TINT8, 824 gc.TINT16<<16 | gc.TUINT8, 825 gc.TUINT16<<16 | gc.TUINT8, 826 gc.TINT32<<16 | gc.TUINT8, 827 gc.TUINT32<<16 | gc.TUINT8: 828 a = x86.AMOVB 829 830 goto rsrc 831 832 case gc.TINT64<<16 | gc.TINT8, // truncate low word 833 gc.TUINT64<<16 | gc.TINT8, 834 gc.TINT64<<16 | gc.TUINT8, 835 gc.TUINT64<<16 | gc.TUINT8: 836 var flo gc.Node 837 var fhi gc.Node 838 split64(f, &flo, &fhi) 839 840 var r1 gc.Node 841 gc.Nodreg(&r1, t.Type, x86.REG_AX) 842 gmove(&flo, &r1) 843 gins(x86.AMOVB, &r1, t) 844 splitclean() 845 return 846 847 case gc.TINT16<<16 | gc.TINT16, // same size 848 gc.TINT16<<16 | gc.TUINT16, 849 gc.TUINT16<<16 | gc.TINT16, 850 gc.TUINT16<<16 | gc.TUINT16: 851 a = x86.AMOVW 852 853 case gc.TINT32<<16 | gc.TINT16, // truncate 854 gc.TUINT32<<16 | gc.TINT16, 855 gc.TINT32<<16 | gc.TUINT16, 856 gc.TUINT32<<16 | gc.TUINT16: 857 a = x86.AMOVW 858 859 goto rsrc 860 861 case gc.TINT64<<16 | gc.TINT16, // truncate low word 862 gc.TUINT64<<16 | gc.TINT16, 863 gc.TINT64<<16 | gc.TUINT16, 864 gc.TUINT64<<16 | gc.TUINT16: 865 var flo gc.Node 866 var fhi gc.Node 867 split64(f, &flo, &fhi) 868 869 var r1 gc.Node 870 gc.Nodreg(&r1, t.Type, x86.REG_AX) 871 gmove(&flo, &r1) 872 gins(x86.AMOVW, &r1, t) 873 splitclean() 874 return 875 876 case gc.TINT32<<16 | gc.TINT32, // same size 877 gc.TINT32<<16 | gc.TUINT32, 878 gc.TUINT32<<16 | gc.TINT32, 879 gc.TUINT32<<16 | gc.TUINT32: 880 a = x86.AMOVL 881 882 case gc.TINT64<<16 | gc.TINT32, // truncate 883 gc.TUINT64<<16 | gc.TINT32, 884 gc.TINT64<<16 | gc.TUINT32, 885 gc.TUINT64<<16 | gc.TUINT32: 886 var fhi gc.Node 887 var flo gc.Node 888 split64(f, &flo, &fhi) 889 890 var r1 gc.Node 891 gc.Nodreg(&r1, t.Type, x86.REG_AX) 892 gmove(&flo, &r1) 893 gins(x86.AMOVL, &r1, t) 894 splitclean() 895 return 896 897 case gc.TINT64<<16 | gc.TINT64, // same size 898 gc.TINT64<<16 | gc.TUINT64, 899 gc.TUINT64<<16 | gc.TINT64, 900 gc.TUINT64<<16 | gc.TUINT64: 901 var fhi gc.Node 902 var flo gc.Node 903 split64(f, &flo, &fhi) 904 905 var tlo gc.Node 906 var thi gc.Node 907 split64(t, &tlo, &thi) 908 if f.Op == gc.OLITERAL { 909 gins(x86.AMOVL, &flo, &tlo) 910 gins(x86.AMOVL, &fhi, &thi) 911 } else { 912 // Implementation of conversion-free x = y for int64 or uint64 x. 913 // This is generated by the code that copies small values out of closures, 914 // and that code has DX live, so avoid DX and use CX instead. 915 var r1 gc.Node 916 gc.Nodreg(&r1, gc.Types[gc.TUINT32], x86.REG_AX) 917 var r2 gc.Node 918 gc.Nodreg(&r2, gc.Types[gc.TUINT32], x86.REG_CX) 919 gins(x86.AMOVL, &flo, &r1) 920 gins(x86.AMOVL, &fhi, &r2) 921 gins(x86.AMOVL, &r1, &tlo) 922 gins(x86.AMOVL, &r2, &thi) 923 } 924 925 splitclean() 926 splitclean() 927 return 928 929 /* 930 * integer up-conversions 931 */ 932 case gc.TINT8<<16 | gc.TINT16, // sign extend int8 933 gc.TINT8<<16 | gc.TUINT16: 934 a = x86.AMOVBWSX 935 936 goto rdst 937 938 case gc.TINT8<<16 | gc.TINT32, 939 gc.TINT8<<16 | gc.TUINT32: 940 a = x86.AMOVBLSX 941 goto rdst 942 943 case gc.TINT8<<16 | gc.TINT64, // convert via int32 944 gc.TINT8<<16 | gc.TUINT64: 945 cvt = gc.Types[gc.TINT32] 946 947 goto hard 948 949 case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8 950 gc.TUINT8<<16 | gc.TUINT16: 951 a = x86.AMOVBWZX 952 953 goto rdst 954 955 case gc.TUINT8<<16 | gc.TINT32, 956 gc.TUINT8<<16 | gc.TUINT32: 957 a = x86.AMOVBLZX 958 goto rdst 959 960 case gc.TUINT8<<16 | gc.TINT64, // convert via uint32 961 gc.TUINT8<<16 | gc.TUINT64: 962 cvt = gc.Types[gc.TUINT32] 963 964 goto hard 965 966 case gc.TINT16<<16 | gc.TINT32, // sign extend int16 967 gc.TINT16<<16 | gc.TUINT32: 968 a = x86.AMOVWLSX 969 970 goto rdst 971 972 case gc.TINT16<<16 | gc.TINT64, // convert via int32 973 gc.TINT16<<16 | gc.TUINT64: 974 cvt = gc.Types[gc.TINT32] 975 976 goto hard 977 978 case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16 979 gc.TUINT16<<16 | gc.TUINT32: 980 a = x86.AMOVWLZX 981 982 goto rdst 983 984 case gc.TUINT16<<16 | gc.TINT64, // convert via uint32 985 gc.TUINT16<<16 | gc.TUINT64: 986 cvt = gc.Types[gc.TUINT32] 987 988 goto hard 989 990 case gc.TINT32<<16 | gc.TINT64, // sign extend int32 991 gc.TINT32<<16 | gc.TUINT64: 992 var thi gc.Node 993 var tlo gc.Node 994 split64(t, &tlo, &thi) 995 996 var flo gc.Node 997 gc.Nodreg(&flo, tlo.Type, x86.REG_AX) 998 var fhi gc.Node 999 gc.Nodreg(&fhi, thi.Type, x86.REG_DX) 1000 gmove(f, &flo) 1001 gins(x86.ACDQ, nil, nil) 1002 gins(x86.AMOVL, &flo, &tlo) 1003 gins(x86.AMOVL, &fhi, &thi) 1004 splitclean() 1005 return 1006 1007 case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32 1008 gc.TUINT32<<16 | gc.TUINT64: 1009 var tlo gc.Node 1010 var thi gc.Node 1011 split64(t, &tlo, &thi) 1012 1013 gmove(f, &tlo) 1014 gins(x86.AMOVL, ncon(0), &thi) 1015 splitclean() 1016 return 1017 } 1018 1019 gins(a, f, t) 1020 return 1021 1022 // requires register source 1023 rsrc: 1024 gc.Regalloc(&r1, f.Type, t) 1025 1026 gmove(f, &r1) 1027 gins(a, &r1, t) 1028 gc.Regfree(&r1) 1029 return 1030 1031 // requires register destination 1032 rdst: 1033 { 1034 gc.Regalloc(&r1, t.Type, t) 1035 1036 gins(a, f, &r1) 1037 gmove(&r1, t) 1038 gc.Regfree(&r1) 1039 return 1040 } 1041 1042 // requires register intermediate 1043 hard: 1044 gc.Regalloc(&r1, cvt, t) 1045 1046 gmove(f, &r1) 1047 gmove(&r1, t) 1048 gc.Regfree(&r1) 1049 return 1050 } 1051 1052 func floatmove(f *gc.Node, t *gc.Node) { 1053 var r1 gc.Node 1054 1055 ft := gc.Simsimtype(f.Type) 1056 tt := gc.Simsimtype(t.Type) 1057 cvt := t.Type 1058 1059 // cannot have two floating point memory operands. 1060 if gc.Isfloat[ft] && gc.Isfloat[tt] && gc.Ismem(f) && gc.Ismem(t) { 1061 goto hard 1062 } 1063 1064 // convert constant to desired type 1065 if f.Op == gc.OLITERAL { 1066 var con gc.Node 1067 f.Convconst(&con, t.Type) 1068 f = &con 1069 ft = gc.Simsimtype(con.Type) 1070 1071 // some constants can't move directly to memory. 1072 if gc.Ismem(t) { 1073 // float constants come from memory. 1074 if gc.Isfloat[tt] { 1075 goto hard 1076 } 1077 } 1078 } 1079 1080 // value -> value copy, only one memory operand. 1081 // figure out the instruction to use. 1082 // break out of switch for one-instruction gins. 1083 // goto rdst for "destination must be register". 1084 // goto hard for "convert to cvt type first". 1085 // otherwise handle and return. 1086 1087 switch uint32(ft)<<16 | uint32(tt) { 1088 default: 1089 if gc.Thearch.Use387 { 1090 floatmove_387(f, t) 1091 } else { 1092 floatmove_sse(f, t) 1093 } 1094 return 1095 1096 // float to very long integer. 1097 case gc.TFLOAT32<<16 | gc.TINT64, 1098 gc.TFLOAT64<<16 | gc.TINT64: 1099 if f.Op == gc.OREGISTER { 1100 cvt = f.Type 1101 goto hardmem 1102 } 1103 1104 var r1 gc.Node 1105 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1106 if ft == gc.TFLOAT32 { 1107 gins(x86.AFMOVF, f, &r1) 1108 } else { 1109 gins(x86.AFMOVD, f, &r1) 1110 } 1111 1112 // set round to zero mode during conversion 1113 var t1 gc.Node 1114 memname(&t1, gc.Types[gc.TUINT16]) 1115 1116 var t2 gc.Node 1117 memname(&t2, gc.Types[gc.TUINT16]) 1118 gins(x86.AFSTCW, nil, &t1) 1119 gins(x86.AMOVW, ncon(0xf7f), &t2) 1120 gins(x86.AFLDCW, &t2, nil) 1121 if tt == gc.TINT16 { 1122 gins(x86.AFMOVWP, &r1, t) 1123 } else if tt == gc.TINT32 { 1124 gins(x86.AFMOVLP, &r1, t) 1125 } else { 1126 gins(x86.AFMOVVP, &r1, t) 1127 } 1128 gins(x86.AFLDCW, &t1, nil) 1129 return 1130 1131 case gc.TFLOAT32<<16 | gc.TUINT64, 1132 gc.TFLOAT64<<16 | gc.TUINT64: 1133 if !gc.Ismem(f) { 1134 cvt = f.Type 1135 goto hardmem 1136 } 1137 1138 bignodes() 1139 var f0 gc.Node 1140 gc.Nodreg(&f0, gc.Types[ft], x86.REG_F0) 1141 var f1 gc.Node 1142 gc.Nodreg(&f1, gc.Types[ft], x86.REG_F0+1) 1143 var ax gc.Node 1144 gc.Nodreg(&ax, gc.Types[gc.TUINT16], x86.REG_AX) 1145 1146 if ft == gc.TFLOAT32 { 1147 gins(x86.AFMOVF, f, &f0) 1148 } else { 1149 gins(x86.AFMOVD, f, &f0) 1150 } 1151 1152 // if 0 > v { answer = 0 } 1153 gins(x86.AFMOVD, &zerof, &f0) 1154 1155 gins(x86.AFUCOMIP, &f0, &f1) 1156 p1 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1157 1158 // if 1<<64 <= v { answer = 0 too } 1159 gins(x86.AFMOVD, &two64f, &f0) 1160 1161 gins(x86.AFUCOMIP, &f0, &f1) 1162 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[tt]), nil, 0) 1163 gc.Patch(p1, gc.Pc) 1164 gins(x86.AFMOVVP, &f0, t) // don't care about t, but will pop the stack 1165 var thi gc.Node 1166 var tlo gc.Node 1167 split64(t, &tlo, &thi) 1168 gins(x86.AMOVL, ncon(0), &tlo) 1169 gins(x86.AMOVL, ncon(0), &thi) 1170 splitclean() 1171 p1 = gc.Gbranch(obj.AJMP, nil, 0) 1172 gc.Patch(p2, gc.Pc) 1173 1174 // in range; algorithm is: 1175 // if small enough, use native float64 -> int64 conversion. 1176 // otherwise, subtract 2^63, convert, and add it back. 1177 1178 // set round to zero mode during conversion 1179 var t1 gc.Node 1180 memname(&t1, gc.Types[gc.TUINT16]) 1181 1182 var t2 gc.Node 1183 memname(&t2, gc.Types[gc.TUINT16]) 1184 gins(x86.AFSTCW, nil, &t1) 1185 gins(x86.AMOVW, ncon(0xf7f), &t2) 1186 gins(x86.AFLDCW, &t2, nil) 1187 1188 // actual work 1189 gins(x86.AFMOVD, &two63f, &f0) 1190 1191 gins(x86.AFUCOMIP, &f0, &f1) 1192 p2 = gc.Gbranch(optoas(gc.OLE, gc.Types[tt]), nil, 0) 1193 gins(x86.AFMOVVP, &f0, t) 1194 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1195 gc.Patch(p2, gc.Pc) 1196 gins(x86.AFMOVD, &two63f, &f0) 1197 gins(x86.AFSUBDP, &f0, &f1) 1198 gins(x86.AFMOVVP, &f0, t) 1199 split64(t, &tlo, &thi) 1200 gins(x86.AXORL, ncon(0x80000000), &thi) // + 2^63 1201 gc.Patch(p3, gc.Pc) 1202 splitclean() 1203 1204 // restore rounding mode 1205 gins(x86.AFLDCW, &t1, nil) 1206 1207 gc.Patch(p1, gc.Pc) 1208 return 1209 1210 /* 1211 * integer to float 1212 */ 1213 case gc.TINT64<<16 | gc.TFLOAT32, 1214 gc.TINT64<<16 | gc.TFLOAT64: 1215 if t.Op == gc.OREGISTER { 1216 goto hardmem 1217 } 1218 var f0 gc.Node 1219 gc.Nodreg(&f0, t.Type, x86.REG_F0) 1220 gins(x86.AFMOVV, f, &f0) 1221 if tt == gc.TFLOAT32 { 1222 gins(x86.AFMOVFP, &f0, t) 1223 } else { 1224 gins(x86.AFMOVDP, &f0, t) 1225 } 1226 return 1227 1228 // algorithm is: 1229 // if small enough, use native int64 -> float64 conversion. 1230 // otherwise, halve (rounding to odd?), convert, and double. 1231 case gc.TUINT64<<16 | gc.TFLOAT32, 1232 gc.TUINT64<<16 | gc.TFLOAT64: 1233 var ax gc.Node 1234 gc.Nodreg(&ax, gc.Types[gc.TUINT32], x86.REG_AX) 1235 1236 var dx gc.Node 1237 gc.Nodreg(&dx, gc.Types[gc.TUINT32], x86.REG_DX) 1238 var cx gc.Node 1239 gc.Nodreg(&cx, gc.Types[gc.TUINT32], x86.REG_CX) 1240 var t1 gc.Node 1241 gc.Tempname(&t1, f.Type) 1242 var tlo gc.Node 1243 var thi gc.Node 1244 split64(&t1, &tlo, &thi) 1245 gmove(f, &t1) 1246 gins(x86.ACMPL, &thi, ncon(0)) 1247 p1 := gc.Gbranch(x86.AJLT, nil, 0) 1248 1249 // native 1250 var r1 gc.Node 1251 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1252 1253 gins(x86.AFMOVV, &t1, &r1) 1254 if tt == gc.TFLOAT32 { 1255 gins(x86.AFMOVFP, &r1, t) 1256 } else { 1257 gins(x86.AFMOVDP, &r1, t) 1258 } 1259 p2 := gc.Gbranch(obj.AJMP, nil, 0) 1260 1261 // simulated 1262 gc.Patch(p1, gc.Pc) 1263 1264 gmove(&tlo, &ax) 1265 gmove(&thi, &dx) 1266 p1 = gins(x86.ASHRL, ncon(1), &ax) 1267 p1.From.Index = x86.REG_DX // double-width shift DX -> AX 1268 p1.From.Scale = 0 1269 gins(x86.AMOVL, ncon(0), &cx) 1270 gins(x86.ASETCC, nil, &cx) 1271 gins(x86.AORL, &cx, &ax) 1272 gins(x86.ASHRL, ncon(1), &dx) 1273 gmove(&dx, &thi) 1274 gmove(&ax, &tlo) 1275 gc.Nodreg(&r1, gc.Types[tt], x86.REG_F0) 1276 var r2 gc.Node 1277 gc.Nodreg(&r2, gc.Types[tt], x86.REG_F0+1) 1278 gins(x86.AFMOVV, &t1, &r1) 1279 gins(x86.AFMOVD, &r1, &r1) 1280 gins(x86.AFADDDP, &r1, &r2) 1281 if tt == gc.TFLOAT32 { 1282 gins(x86.AFMOVFP, &r1, t) 1283 } else { 1284 gins(x86.AFMOVDP, &r1, t) 1285 } 1286 gc.Patch(p2, gc.Pc) 1287 splitclean() 1288 return 1289 } 1290 1291 // requires register intermediate 1292 hard: 1293 gc.Regalloc(&r1, cvt, t) 1294 1295 gmove(f, &r1) 1296 gmove(&r1, t) 1297 gc.Regfree(&r1) 1298 return 1299 1300 // requires memory intermediate 1301 hardmem: 1302 gc.Tempname(&r1, cvt) 1303 1304 gmove(f, &r1) 1305 gmove(&r1, t) 1306 return 1307 } 1308 1309 func floatmove_387(f *gc.Node, t *gc.Node) { 1310 var r1 gc.Node 1311 var a int 1312 1313 ft := gc.Simsimtype(f.Type) 1314 tt := gc.Simsimtype(t.Type) 1315 cvt := t.Type 1316 1317 switch uint32(ft)<<16 | uint32(tt) { 1318 default: 1319 goto fatal 1320 1321 /* 1322 * float to integer 1323 */ 1324 case gc.TFLOAT32<<16 | gc.TINT16, 1325 gc.TFLOAT32<<16 | gc.TINT32, 1326 gc.TFLOAT32<<16 | gc.TINT64, 1327 gc.TFLOAT64<<16 | gc.TINT16, 1328 gc.TFLOAT64<<16 | gc.TINT32, 1329 gc.TFLOAT64<<16 | gc.TINT64: 1330 if t.Op == gc.OREGISTER { 1331 goto hardmem 1332 } 1333 var r1 gc.Node 1334 gc.Nodreg(&r1, gc.Types[ft], x86.REG_F0) 1335 if f.Op != gc.OREGISTER { 1336 if ft == gc.TFLOAT32 { 1337 gins(x86.AFMOVF, f, &r1) 1338 } else { 1339 gins(x86.AFMOVD, f, &r1) 1340 } 1341 } 1342 1343 // set round to zero mode during conversion 1344 var t1 gc.Node 1345 memname(&t1, gc.Types[gc.TUINT16]) 1346 1347 var t2 gc.Node 1348 memname(&t2, gc.Types[gc.TUINT16]) 1349 gins(x86.AFSTCW, nil, &t1) 1350 gins(x86.AMOVW, ncon(0xf7f), &t2) 1351 gins(x86.AFLDCW, &t2, nil) 1352 if tt == gc.TINT16 { 1353 gins(x86.AFMOVWP, &r1, t) 1354 } else if tt == gc.TINT32 { 1355 gins(x86.AFMOVLP, &r1, t) 1356 } else { 1357 gins(x86.AFMOVVP, &r1, t) 1358 } 1359 gins(x86.AFLDCW, &t1, nil) 1360 return 1361 1362 // convert via int32. 1363 case gc.TFLOAT32<<16 | gc.TINT8, 1364 gc.TFLOAT32<<16 | gc.TUINT16, 1365 gc.TFLOAT32<<16 | gc.TUINT8, 1366 gc.TFLOAT64<<16 | gc.TINT8, 1367 gc.TFLOAT64<<16 | gc.TUINT16, 1368 gc.TFLOAT64<<16 | gc.TUINT8: 1369 var t1 gc.Node 1370 gc.Tempname(&t1, gc.Types[gc.TINT32]) 1371 1372 gmove(f, &t1) 1373 switch tt { 1374 default: 1375 gc.Fatal("gmove %v", t) 1376 1377 case gc.TINT8: 1378 gins(x86.ACMPL, &t1, ncon(-0x80&(1<<32-1))) 1379 p1 := gc.Gbranch(optoas(gc.OLT, gc.Types[gc.TINT32]), nil, -1) 1380 gins(x86.ACMPL, &t1, ncon(0x7f)) 1381 p2 := gc.Gbranch(optoas(gc.OGT, gc.Types[gc.TINT32]), nil, -1) 1382 p3 := gc.Gbranch(obj.AJMP, nil, 0) 1383 gc.Patch(p1, gc.Pc) 1384 gc.Patch(p2, gc.Pc) 1385 gmove(ncon(-0x80&(1<<32-1)), &t1) 1386 gc.Patch(p3, gc.Pc) 1387 gmove(&t1, t) 1388 1389 case gc.TUINT8: 1390 gins(x86.ATESTL, ncon(0xffffff00), &t1) 1391 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1392 gins(x86.AMOVL, ncon(0), &t1) 1393 gc.Patch(p1, gc.Pc) 1394 gmove(&t1, t) 1395 1396 case gc.TUINT16: 1397 gins(x86.ATESTL, ncon(0xffff0000), &t1) 1398 p1 := gc.Gbranch(x86.AJEQ, nil, +1) 1399 gins(x86.AMOVL, ncon(0), &t1) 1400 gc.Patch(p1, gc.Pc) 1401 gmove(&t1, t) 1402 } 1403 1404 return 1405 1406 // convert via int64. 1407 case gc.TFLOAT32<<16 | gc.TUINT32, 1408 gc.TFLOAT64<<16 | gc.TUINT32: 1409 cvt = gc.Types[gc.TINT64] 1410 1411 goto hardmem 1412 1413 /* 1414 * integer to float 1415 */ 1416 case gc.TINT16<<16 | gc.TFLOAT32, 1417 gc.TINT16<<16 | gc.TFLOAT64, 1418 gc.TINT32<<16 | gc.TFLOAT32, 1419 gc.TINT32<<16 | gc.TFLOAT64, 1420 gc.TINT64<<16 | gc.TFLOAT32, 1421 gc.TINT64<<16 | gc.TFLOAT64: 1422 if t.Op != gc.OREGISTER { 1423 goto hard 1424 } 1425 if f.Op == gc.OREGISTER { 1426 cvt = f.Type 1427 goto hardmem 1428 } 1429 1430 switch ft { 1431 case gc.TINT16: 1432 a = x86.AFMOVW 1433 1434 case gc.TINT32: 1435 a = x86.AFMOVL 1436 1437 default: 1438 a = x86.AFMOVV 1439 } 1440 1441 // convert via int32 memory 1442 case gc.TINT8<<16 | gc.TFLOAT32, 1443 gc.TINT8<<16 | gc.TFLOAT64, 1444 gc.TUINT16<<16 | gc.TFLOAT32, 1445 gc.TUINT16<<16 | gc.TFLOAT64, 1446 gc.TUINT8<<16 | gc.TFLOAT32, 1447 gc.TUINT8<<16 | gc.TFLOAT64: 1448 cvt = gc.Types[gc.TINT32] 1449 1450 goto hardmem 1451 1452 // convert via int64 memory 1453 case gc.TUINT32<<16 | gc.TFLOAT32, 1454 gc.TUINT32<<16 | gc.TFLOAT64: 1455 cvt = gc.Types[gc.TINT64] 1456 1457 goto hardmem 1458 1459 // The way the code generator uses floating-point 1460 // registers, a move from F0 to F0 is intended as a no-op. 1461 // On the x86, it's not: it pushes a second copy of F0 1462 // on the floating point stack. So toss it away here. 1463 // Also, F0 is the *only* register we ever evaluate 1464 // into, so we should only see register/register as F0/F0. 1465 /* 1466 * float to float 1467 */ 1468 case gc.TFLOAT32<<16 | gc.TFLOAT32, 1469 gc.TFLOAT64<<16 | gc.TFLOAT64: 1470 if gc.Ismem(f) && gc.Ismem(t) { 1471 goto hard 1472 } 1473 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1474 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1475 goto fatal 1476 } 1477 return 1478 } 1479 1480 a = x86.AFMOVF 1481 if ft == gc.TFLOAT64 { 1482 a = x86.AFMOVD 1483 } 1484 if gc.Ismem(t) { 1485 if f.Op != gc.OREGISTER || f.Reg != x86.REG_F0 { 1486 gc.Fatal("gmove %v", f) 1487 } 1488 a = x86.AFMOVFP 1489 if ft == gc.TFLOAT64 { 1490 a = x86.AFMOVDP 1491 } 1492 } 1493 1494 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1495 if gc.Ismem(f) && gc.Ismem(t) { 1496 goto hard 1497 } 1498 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1499 if f.Reg != x86.REG_F0 || t.Reg != x86.REG_F0 { 1500 goto fatal 1501 } 1502 return 1503 } 1504 1505 if f.Op == gc.OREGISTER { 1506 gins(x86.AFMOVDP, f, t) 1507 } else { 1508 gins(x86.AFMOVF, f, t) 1509 } 1510 return 1511 1512 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1513 if gc.Ismem(f) && gc.Ismem(t) { 1514 goto hard 1515 } 1516 if f.Op == gc.OREGISTER && t.Op == gc.OREGISTER { 1517 var r1 gc.Node 1518 gc.Tempname(&r1, gc.Types[gc.TFLOAT32]) 1519 gins(x86.AFMOVFP, f, &r1) 1520 gins(x86.AFMOVF, &r1, t) 1521 return 1522 } 1523 1524 if f.Op == gc.OREGISTER { 1525 gins(x86.AFMOVFP, f, t) 1526 } else { 1527 gins(x86.AFMOVD, f, t) 1528 } 1529 return 1530 } 1531 1532 gins(a, f, t) 1533 return 1534 1535 // requires register intermediate 1536 hard: 1537 gc.Regalloc(&r1, cvt, t) 1538 1539 gmove(f, &r1) 1540 gmove(&r1, t) 1541 gc.Regfree(&r1) 1542 return 1543 1544 // requires memory intermediate 1545 hardmem: 1546 gc.Tempname(&r1, cvt) 1547 1548 gmove(f, &r1) 1549 gmove(&r1, t) 1550 return 1551 1552 // should not happen 1553 fatal: 1554 gc.Fatal("gmove %v -> %v", gc.Nconv(f, obj.FmtLong), gc.Nconv(t, obj.FmtLong)) 1555 1556 return 1557 } 1558 1559 func floatmove_sse(f *gc.Node, t *gc.Node) { 1560 var r1 gc.Node 1561 var cvt *gc.Type 1562 var a int 1563 1564 ft := gc.Simsimtype(f.Type) 1565 tt := gc.Simsimtype(t.Type) 1566 1567 switch uint32(ft)<<16 | uint32(tt) { 1568 // should not happen 1569 default: 1570 gc.Fatal("gmove %v -> %v", f, t) 1571 1572 return 1573 1574 // convert via int32. 1575 /* 1576 * float to integer 1577 */ 1578 case gc.TFLOAT32<<16 | gc.TINT16, 1579 gc.TFLOAT32<<16 | gc.TINT8, 1580 gc.TFLOAT32<<16 | gc.TUINT16, 1581 gc.TFLOAT32<<16 | gc.TUINT8, 1582 gc.TFLOAT64<<16 | gc.TINT16, 1583 gc.TFLOAT64<<16 | gc.TINT8, 1584 gc.TFLOAT64<<16 | gc.TUINT16, 1585 gc.TFLOAT64<<16 | gc.TUINT8: 1586 cvt = gc.Types[gc.TINT32] 1587 1588 goto hard 1589 1590 // convert via int64. 1591 case gc.TFLOAT32<<16 | gc.TUINT32, 1592 gc.TFLOAT64<<16 | gc.TUINT32: 1593 cvt = gc.Types[gc.TINT64] 1594 1595 goto hardmem 1596 1597 case gc.TFLOAT32<<16 | gc.TINT32: 1598 a = x86.ACVTTSS2SL 1599 goto rdst 1600 1601 case gc.TFLOAT64<<16 | gc.TINT32: 1602 a = x86.ACVTTSD2SL 1603 goto rdst 1604 1605 // convert via int32 memory 1606 /* 1607 * integer to float 1608 */ 1609 case gc.TINT8<<16 | gc.TFLOAT32, 1610 gc.TINT8<<16 | gc.TFLOAT64, 1611 gc.TINT16<<16 | gc.TFLOAT32, 1612 gc.TINT16<<16 | gc.TFLOAT64, 1613 gc.TUINT16<<16 | gc.TFLOAT32, 1614 gc.TUINT16<<16 | gc.TFLOAT64, 1615 gc.TUINT8<<16 | gc.TFLOAT32, 1616 gc.TUINT8<<16 | gc.TFLOAT64: 1617 cvt = gc.Types[gc.TINT32] 1618 1619 goto hard 1620 1621 // convert via int64 memory 1622 case gc.TUINT32<<16 | gc.TFLOAT32, 1623 gc.TUINT32<<16 | gc.TFLOAT64: 1624 cvt = gc.Types[gc.TINT64] 1625 1626 goto hardmem 1627 1628 case gc.TINT32<<16 | gc.TFLOAT32: 1629 a = x86.ACVTSL2SS 1630 goto rdst 1631 1632 case gc.TINT32<<16 | gc.TFLOAT64: 1633 a = x86.ACVTSL2SD 1634 goto rdst 1635 1636 /* 1637 * float to float 1638 */ 1639 case gc.TFLOAT32<<16 | gc.TFLOAT32: 1640 a = x86.AMOVSS 1641 1642 case gc.TFLOAT64<<16 | gc.TFLOAT64: 1643 a = x86.AMOVSD 1644 1645 case gc.TFLOAT32<<16 | gc.TFLOAT64: 1646 a = x86.ACVTSS2SD 1647 goto rdst 1648 1649 case gc.TFLOAT64<<16 | gc.TFLOAT32: 1650 a = x86.ACVTSD2SS 1651 goto rdst 1652 } 1653 1654 gins(a, f, t) 1655 return 1656 1657 // requires register intermediate 1658 hard: 1659 gc.Regalloc(&r1, cvt, t) 1660 1661 gmove(f, &r1) 1662 gmove(&r1, t) 1663 gc.Regfree(&r1) 1664 return 1665 1666 // requires memory intermediate 1667 hardmem: 1668 gc.Tempname(&r1, cvt) 1669 1670 gmove(f, &r1) 1671 gmove(&r1, t) 1672 return 1673 1674 // requires register destination 1675 rdst: 1676 gc.Regalloc(&r1, t.Type, t) 1677 1678 gins(a, f, &r1) 1679 gmove(&r1, t) 1680 gc.Regfree(&r1) 1681 return 1682 } 1683 1684 func samaddr(f *gc.Node, t *gc.Node) bool { 1685 if f.Op != t.Op { 1686 return false 1687 } 1688 1689 switch f.Op { 1690 case gc.OREGISTER: 1691 if f.Reg != t.Reg { 1692 break 1693 } 1694 return true 1695 } 1696 1697 return false 1698 } 1699 1700 /* 1701 * generate one instruction: 1702 * as f, t 1703 */ 1704 func gins(as int, f *gc.Node, t *gc.Node) *obj.Prog { 1705 if as == x86.AFMOVF && f != nil && f.Op == gc.OREGISTER && t != nil && t.Op == gc.OREGISTER { 1706 gc.Fatal("gins MOVF reg, reg") 1707 } 1708 if as == x86.ACVTSD2SS && f != nil && f.Op == gc.OLITERAL { 1709 gc.Fatal("gins CVTSD2SS const") 1710 } 1711 if as == x86.AMOVSD && t != nil && t.Op == gc.OREGISTER && t.Reg == x86.REG_F0 { 1712 gc.Fatal("gins MOVSD into F0") 1713 } 1714 1715 if as == x86.AMOVL && f != nil && f.Op == gc.OADDR && f.Left.Op == gc.ONAME && f.Left.Class != gc.PEXTERN && f.Left.Class != gc.PFUNC { 1716 // Turn MOVL $xxx(FP/SP) into LEAL xxx. 1717 // These should be equivalent but most of the backend 1718 // only expects to see LEAL, because that's what we had 1719 // historically generated. Various hidden assumptions are baked in by now. 1720 as = x86.ALEAL 1721 f = f.Left 1722 } 1723 1724 switch as { 1725 case x86.AMOVB, 1726 x86.AMOVW, 1727 x86.AMOVL: 1728 if f != nil && t != nil && samaddr(f, t) { 1729 return nil 1730 } 1731 1732 case x86.ALEAL: 1733 if f != nil && gc.Isconst(f, gc.CTNIL) { 1734 gc.Fatal("gins LEAL nil %v", f.Type) 1735 } 1736 } 1737 1738 p := gc.Prog(as) 1739 gc.Naddr(&p.From, f) 1740 gc.Naddr(&p.To, t) 1741 1742 if gc.Debug['g'] != 0 { 1743 fmt.Printf("%v\n", p) 1744 } 1745 1746 w := 0 1747 switch as { 1748 case x86.AMOVB: 1749 w = 1 1750 1751 case x86.AMOVW: 1752 w = 2 1753 1754 case x86.AMOVL: 1755 w = 4 1756 } 1757 1758 if true && w != 0 && f != nil && (p.From.Width > int64(w) || p.To.Width > int64(w)) { 1759 gc.Dump("bad width from:", f) 1760 gc.Dump("bad width to:", t) 1761 gc.Fatal("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width) 1762 } 1763 1764 if p.To.Type == obj.TYPE_ADDR && w > 0 { 1765 gc.Fatal("bad use of addr: %v", p) 1766 } 1767 1768 return p 1769 } 1770 1771 func ginsnop() { 1772 var reg gc.Node 1773 gc.Nodreg(®, gc.Types[gc.TINT], x86.REG_AX) 1774 gins(x86.AXCHGL, ®, ®) 1775 } 1776 1777 func dotaddable(n *gc.Node, n1 *gc.Node) bool { 1778 if n.Op != gc.ODOT { 1779 return false 1780 } 1781 1782 var oary [10]int64 1783 var nn *gc.Node 1784 o := gc.Dotoffset(n, oary[:], &nn) 1785 if nn != nil && nn.Addable && o == 1 && oary[0] >= 0 { 1786 *n1 = *nn 1787 n1.Type = n.Type 1788 n1.Xoffset += oary[0] 1789 return true 1790 } 1791 1792 return false 1793 } 1794 1795 func sudoclean() { 1796 } 1797 1798 func sudoaddable(as int, n *gc.Node, a *obj.Addr) bool { 1799 *a = obj.Addr{} 1800 return false 1801 } 1802