Home | History | Annotate | Download | only in x86
      1 // Inferno utils/6l/span.c
      2 // http://code.google.com/p/inferno-os/source/browse/utils/6l/span.c
      3 //
      4 //	Copyright  1994-1999 Lucent Technologies Inc.  All rights reserved.
      5 //	Portions Copyright  1995-1997 C H Forsyth (forsyth (a] terzarima.net)
      6 //	Portions Copyright  1997-1999 Vita Nuova Limited
      7 //	Portions Copyright  2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
      8 //	Portions Copyright  2004,2006 Bruce Ellis
      9 //	Portions Copyright  2005-2007 C H Forsyth (forsyth (a] terzarima.net)
     10 //	Revisions Copyright  2000-2007 Lucent Technologies Inc. and others
     11 //	Portions Copyright  2009 The Go Authors.  All rights reserved.
     12 //
     13 // Permission is hereby granted, free of charge, to any person obtaining a copy
     14 // of this software and associated documentation files (the "Software"), to deal
     15 // in the Software without restriction, including without limitation the rights
     16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     17 // copies of the Software, and to permit persons to whom the Software is
     18 // furnished to do so, subject to the following conditions:
     19 //
     20 // The above copyright notice and this permission notice shall be included in
     21 // all copies or substantial portions of the Software.
     22 //
     23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     29 // THE SOFTWARE.
     30 
     31 package x86
     32 
     33 import (
     34 	"cmd/internal/obj"
     35 	"fmt"
     36 	"log"
     37 	"strings"
     38 )
     39 
     40 // Instruction layout.
     41 
     42 const (
     43 	MaxAlign = 32 // max data alignment
     44 
     45 	// Loop alignment constants:
     46 	// want to align loop entry to LoopAlign-byte boundary,
     47 	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
     48 	// We define a loop entry as the target of a backward jump.
     49 	//
     50 	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
     51 	// and it aligns all jump targets, not just backward jump targets.
     52 	//
     53 	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
     54 	// is very slight but negative, so the alignment is disabled by
     55 	// setting MaxLoopPad = 0. The code is here for reference and
     56 	// for future experiments.
     57 	//
     58 	LoopAlign  = 16
     59 	MaxLoopPad = 0
     60 	FuncAlign  = 16
     61 )
     62 
     63 type Optab struct {
     64 	as     int16
     65 	ytab   []ytab
     66 	prefix uint8
     67 	op     [23]uint8
     68 }
     69 
     70 type ytab struct {
     71 	from    uint8
     72 	from3   uint8
     73 	to      uint8
     74 	zcase   uint8
     75 	zoffset uint8
     76 }
     77 
     78 type Movtab struct {
     79 	as   int16
     80 	ft   uint8
     81 	f3t  uint8
     82 	tt   uint8
     83 	code uint8
     84 	op   [4]uint8
     85 }
     86 
     87 const (
     88 	Yxxx = iota
     89 	Ynone
     90 	Yi0 // $0
     91 	Yi1 // $1
     92 	Yi8 // $x, x fits in int8
     93 	Yu8 // $x, x fits in uint8
     94 	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
     95 	Ys32
     96 	Yi32
     97 	Yi64
     98 	Yiauto
     99 	Yal
    100 	Ycl
    101 	Yax
    102 	Ycx
    103 	Yrb
    104 	Yrl
    105 	Yrl32 // Yrl on 32-bit system
    106 	Yrf
    107 	Yf0
    108 	Yrx
    109 	Ymb
    110 	Yml
    111 	Ym
    112 	Ybr
    113 	Ycs
    114 	Yss
    115 	Yds
    116 	Yes
    117 	Yfs
    118 	Ygs
    119 	Ygdtr
    120 	Yidtr
    121 	Yldtr
    122 	Ymsw
    123 	Ytask
    124 	Ycr0
    125 	Ycr1
    126 	Ycr2
    127 	Ycr3
    128 	Ycr4
    129 	Ycr5
    130 	Ycr6
    131 	Ycr7
    132 	Ycr8
    133 	Ydr0
    134 	Ydr1
    135 	Ydr2
    136 	Ydr3
    137 	Ydr4
    138 	Ydr5
    139 	Ydr6
    140 	Ydr7
    141 	Ytr0
    142 	Ytr1
    143 	Ytr2
    144 	Ytr3
    145 	Ytr4
    146 	Ytr5
    147 	Ytr6
    148 	Ytr7
    149 	Ymr
    150 	Ymm
    151 	Yxr
    152 	Yxm
    153 	Ytls
    154 	Ytextsize
    155 	Yindir
    156 	Ymax
    157 )
    158 
    159 const (
    160 	Zxxx = iota
    161 	Zlit
    162 	Zlitm_r
    163 	Z_rp
    164 	Zbr
    165 	Zcall
    166 	Zcallcon
    167 	Zcallduff
    168 	Zcallind
    169 	Zcallindreg
    170 	Zib_
    171 	Zib_rp
    172 	Zibo_m
    173 	Zibo_m_xm
    174 	Zil_
    175 	Zil_rp
    176 	Ziq_rp
    177 	Zilo_m
    178 	Ziqo_m
    179 	Zjmp
    180 	Zjmpcon
    181 	Zloop
    182 	Zo_iw
    183 	Zm_o
    184 	Zm_r
    185 	Zm2_r
    186 	Zm_r_xm
    187 	Zm_r_i_xm
    188 	Zm_r_3d
    189 	Zm_r_xm_nr
    190 	Zr_m_xm_nr
    191 	Zibm_r /* mmx1,mmx2/mem64,imm8 */
    192 	Zmb_r
    193 	Zaut_r
    194 	Zo_m
    195 	Zo_m64
    196 	Zpseudo
    197 	Zr_m
    198 	Zr_m_xm
    199 	Zrp_
    200 	Z_ib
    201 	Z_il
    202 	Zm_ibo
    203 	Zm_ilo
    204 	Zib_rr
    205 	Zil_rr
    206 	Zclr
    207 	Zbyte
    208 	Zmax
    209 )
    210 
    211 const (
    212 	Px  = 0
    213 	Px1 = 1    // symbolic; exact value doesn't matter
    214 	P32 = 0x32 /* 32-bit only */
    215 	Pe  = 0x66 /* operand escape */
    216 	Pm  = 0x0f /* 2byte opcode escape */
    217 	Pq  = 0xff /* both escapes: 66 0f */
    218 	Pb  = 0xfe /* byte operands */
    219 	Pf2 = 0xf2 /* xmm escape 1: f2 0f */
    220 	Pf3 = 0xf3 /* xmm escape 2: f3 0f */
    221 	Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
    222 	Pw  = 0x48 /* Rex.w */
    223 	Pw8 = 0x90 // symbolic; exact value doesn't matter
    224 	Py  = 0x80 /* defaults to 64-bit mode */
    225 	Py1 = 0x81 // symbolic; exact value doesn't matter
    226 	Py3 = 0x83 // symbolic; exact value doesn't matter
    227 
    228 	Rxf = 1 << 9 /* internal flag for Rxr on from */
    229 	Rxt = 1 << 8 /* internal flag for Rxr on to */
    230 	Rxw = 1 << 3 /* =1, 64-bit operand size */
    231 	Rxr = 1 << 2 /* extend modrm reg */
    232 	Rxx = 1 << 1 /* extend sib index */
    233 	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
    234 
    235 	Maxand = 10 /* in -a output width of the byte codes */
    236 )
    237 
    238 var ycover [Ymax * Ymax]uint8
    239 
    240 var reg [MAXREG]int
    241 
    242 var regrex [MAXREG + 1]int
    243 
    244 var ynone = []ytab{
    245 	{Ynone, Ynone, Ynone, Zlit, 1},
    246 }
    247 
    248 var ysahf = []ytab{
    249 	{Ynone, Ynone, Ynone, Zlit, 2},
    250 	{Ynone, Ynone, Ynone, Zlit, 1},
    251 }
    252 
    253 var ytext = []ytab{
    254 	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
    255 	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
    256 }
    257 
    258 var ynop = []ytab{
    259 	{Ynone, Ynone, Ynone, Zpseudo, 0},
    260 	{Ynone, Ynone, Yiauto, Zpseudo, 0},
    261 	{Ynone, Ynone, Yml, Zpseudo, 0},
    262 	{Ynone, Ynone, Yrf, Zpseudo, 0},
    263 	{Ynone, Ynone, Yxr, Zpseudo, 0},
    264 	{Yiauto, Ynone, Ynone, Zpseudo, 0},
    265 	{Yml, Ynone, Ynone, Zpseudo, 0},
    266 	{Yrf, Ynone, Ynone, Zpseudo, 0},
    267 	{Yxr, Ynone, Ynone, Zpseudo, 1},
    268 }
    269 
    270 var yfuncdata = []ytab{
    271 	{Yi32, Ynone, Ym, Zpseudo, 0},
    272 }
    273 
    274 var ypcdata = []ytab{
    275 	{Yi32, Ynone, Yi32, Zpseudo, 0},
    276 }
    277 
    278 var yxorb = []ytab{
    279 	{Yi32, Ynone, Yal, Zib_, 1},
    280 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    281 	{Yrb, Ynone, Ymb, Zr_m, 1},
    282 	{Ymb, Ynone, Yrb, Zm_r, 1},
    283 }
    284 
    285 var yxorl = []ytab{
    286 	{Yi8, Ynone, Yml, Zibo_m, 2},
    287 	{Yi32, Ynone, Yax, Zil_, 1},
    288 	{Yi32, Ynone, Yml, Zilo_m, 2},
    289 	{Yrl, Ynone, Yml, Zr_m, 1},
    290 	{Yml, Ynone, Yrl, Zm_r, 1},
    291 }
    292 
    293 var yaddl = []ytab{
    294 	{Yi8, Ynone, Yml, Zibo_m, 2},
    295 	{Yi32, Ynone, Yax, Zil_, 1},
    296 	{Yi32, Ynone, Yml, Zilo_m, 2},
    297 	{Yrl, Ynone, Yml, Zr_m, 1},
    298 	{Yml, Ynone, Yrl, Zm_r, 1},
    299 }
    300 
    301 var yincb = []ytab{
    302 	{Ynone, Ynone, Ymb, Zo_m, 2},
    303 }
    304 
    305 var yincw = []ytab{
    306 	{Ynone, Ynone, Yml, Zo_m, 2},
    307 }
    308 
    309 var yincl = []ytab{
    310 	{Ynone, Ynone, Yrl, Z_rp, 1},
    311 	{Ynone, Ynone, Yml, Zo_m, 2},
    312 }
    313 
    314 var yincq = []ytab{
    315 	{Ynone, Ynone, Yml, Zo_m, 2},
    316 }
    317 
    318 var ycmpb = []ytab{
    319 	{Yal, Ynone, Yi32, Z_ib, 1},
    320 	{Ymb, Ynone, Yi32, Zm_ibo, 2},
    321 	{Ymb, Ynone, Yrb, Zm_r, 1},
    322 	{Yrb, Ynone, Ymb, Zr_m, 1},
    323 }
    324 
    325 var ycmpl = []ytab{
    326 	{Yml, Ynone, Yi8, Zm_ibo, 2},
    327 	{Yax, Ynone, Yi32, Z_il, 1},
    328 	{Yml, Ynone, Yi32, Zm_ilo, 2},
    329 	{Yml, Ynone, Yrl, Zm_r, 1},
    330 	{Yrl, Ynone, Yml, Zr_m, 1},
    331 }
    332 
    333 var yshb = []ytab{
    334 	{Yi1, Ynone, Ymb, Zo_m, 2},
    335 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    336 	{Ycx, Ynone, Ymb, Zo_m, 2},
    337 }
    338 
    339 var yshl = []ytab{
    340 	{Yi1, Ynone, Yml, Zo_m, 2},
    341 	{Yi32, Ynone, Yml, Zibo_m, 2},
    342 	{Ycl, Ynone, Yml, Zo_m, 2},
    343 	{Ycx, Ynone, Yml, Zo_m, 2},
    344 }
    345 
    346 var ytestb = []ytab{
    347 	{Yi32, Ynone, Yal, Zib_, 1},
    348 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    349 	{Yrb, Ynone, Ymb, Zr_m, 1},
    350 	{Ymb, Ynone, Yrb, Zm_r, 1},
    351 }
    352 
    353 var ytestl = []ytab{
    354 	{Yi32, Ynone, Yax, Zil_, 1},
    355 	{Yi32, Ynone, Yml, Zilo_m, 2},
    356 	{Yrl, Ynone, Yml, Zr_m, 1},
    357 	{Yml, Ynone, Yrl, Zm_r, 1},
    358 }
    359 
    360 var ymovb = []ytab{
    361 	{Yrb, Ynone, Ymb, Zr_m, 1},
    362 	{Ymb, Ynone, Yrb, Zm_r, 1},
    363 	{Yi32, Ynone, Yrb, Zib_rp, 1},
    364 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    365 }
    366 
    367 var ymbs = []ytab{
    368 	{Ymb, Ynone, Ynone, Zm_o, 2},
    369 }
    370 
    371 var ybtl = []ytab{
    372 	{Yi8, Ynone, Yml, Zibo_m, 2},
    373 	{Yrl, Ynone, Yml, Zr_m, 1},
    374 }
    375 
    376 var ymovw = []ytab{
    377 	{Yrl, Ynone, Yml, Zr_m, 1},
    378 	{Yml, Ynone, Yrl, Zm_r, 1},
    379 	{Yi0, Ynone, Yrl, Zclr, 1},
    380 	{Yi32, Ynone, Yrl, Zil_rp, 1},
    381 	{Yi32, Ynone, Yml, Zilo_m, 2},
    382 	{Yiauto, Ynone, Yrl, Zaut_r, 2},
    383 }
    384 
    385 var ymovl = []ytab{
    386 	{Yrl, Ynone, Yml, Zr_m, 1},
    387 	{Yml, Ynone, Yrl, Zm_r, 1},
    388 	{Yi0, Ynone, Yrl, Zclr, 1},
    389 	{Yi32, Ynone, Yrl, Zil_rp, 1},
    390 	{Yi32, Ynone, Yml, Zilo_m, 2},
    391 	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
    392 	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
    393 	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
    394 	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
    395 	{Yiauto, Ynone, Yrl, Zaut_r, 2},
    396 }
    397 
    398 var yret = []ytab{
    399 	{Ynone, Ynone, Ynone, Zo_iw, 1},
    400 	{Yi32, Ynone, Ynone, Zo_iw, 1},
    401 }
    402 
    403 var ymovq = []ytab{
    404 	// valid in 32-bit mode
    405 	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
    406 	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
    407 	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
    408 	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
    409 	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
    410 
    411 	// valid only in 64-bit mode, usually with 64-bit prefix
    412 	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
    413 	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
    414 	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
    415 	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
    416 	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
    417 	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
    418 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
    419 	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
    420 	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
    421 	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
    422 	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
    423 }
    424 
    425 var ym_rl = []ytab{
    426 	{Ym, Ynone, Yrl, Zm_r, 1},
    427 }
    428 
    429 var yrl_m = []ytab{
    430 	{Yrl, Ynone, Ym, Zr_m, 1},
    431 }
    432 
    433 var ymb_rl = []ytab{
    434 	{Ymb, Ynone, Yrl, Zmb_r, 1},
    435 }
    436 
    437 var yml_rl = []ytab{
    438 	{Yml, Ynone, Yrl, Zm_r, 1},
    439 }
    440 
    441 var yrl_ml = []ytab{
    442 	{Yrl, Ynone, Yml, Zr_m, 1},
    443 }
    444 
    445 var yml_mb = []ytab{
    446 	{Yrb, Ynone, Ymb, Zr_m, 1},
    447 	{Ymb, Ynone, Yrb, Zm_r, 1},
    448 }
    449 
    450 var yrb_mb = []ytab{
    451 	{Yrb, Ynone, Ymb, Zr_m, 1},
    452 }
    453 
    454 var yxchg = []ytab{
    455 	{Yax, Ynone, Yrl, Z_rp, 1},
    456 	{Yrl, Ynone, Yax, Zrp_, 1},
    457 	{Yrl, Ynone, Yml, Zr_m, 1},
    458 	{Yml, Ynone, Yrl, Zm_r, 1},
    459 }
    460 
    461 var ydivl = []ytab{
    462 	{Yml, Ynone, Ynone, Zm_o, 2},
    463 }
    464 
    465 var ydivb = []ytab{
    466 	{Ymb, Ynone, Ynone, Zm_o, 2},
    467 }
    468 
    469 var yimul = []ytab{
    470 	{Yml, Ynone, Ynone, Zm_o, 2},
    471 	{Yi8, Ynone, Yrl, Zib_rr, 1},
    472 	{Yi32, Ynone, Yrl, Zil_rr, 1},
    473 	{Yml, Ynone, Yrl, Zm_r, 2},
    474 }
    475 
    476 var yimul3 = []ytab{
    477 	{Yi8, Yml, Yrl, Zibm_r, 2},
    478 }
    479 
    480 var ybyte = []ytab{
    481 	{Yi64, Ynone, Ynone, Zbyte, 1},
    482 }
    483 
    484 var yin = []ytab{
    485 	{Yi32, Ynone, Ynone, Zib_, 1},
    486 	{Ynone, Ynone, Ynone, Zlit, 1},
    487 }
    488 
    489 var yint = []ytab{
    490 	{Yi32, Ynone, Ynone, Zib_, 1},
    491 }
    492 
    493 var ypushl = []ytab{
    494 	{Yrl, Ynone, Ynone, Zrp_, 1},
    495 	{Ym, Ynone, Ynone, Zm_o, 2},
    496 	{Yi8, Ynone, Ynone, Zib_, 1},
    497 	{Yi32, Ynone, Ynone, Zil_, 1},
    498 }
    499 
    500 var ypopl = []ytab{
    501 	{Ynone, Ynone, Yrl, Z_rp, 1},
    502 	{Ynone, Ynone, Ym, Zo_m, 2},
    503 }
    504 
    505 var ybswap = []ytab{
    506 	{Ynone, Ynone, Yrl, Z_rp, 2},
    507 }
    508 
    509 var yscond = []ytab{
    510 	{Ynone, Ynone, Ymb, Zo_m, 2},
    511 }
    512 
    513 var yjcond = []ytab{
    514 	{Ynone, Ynone, Ybr, Zbr, 0},
    515 	{Yi0, Ynone, Ybr, Zbr, 0},
    516 	{Yi1, Ynone, Ybr, Zbr, 1},
    517 }
    518 
    519 var yloop = []ytab{
    520 	{Ynone, Ynone, Ybr, Zloop, 1},
    521 }
    522 
    523 var ycall = []ytab{
    524 	{Ynone, Ynone, Yml, Zcallindreg, 0},
    525 	{Yrx, Ynone, Yrx, Zcallindreg, 2},
    526 	{Ynone, Ynone, Yindir, Zcallind, 2},
    527 	{Ynone, Ynone, Ybr, Zcall, 0},
    528 	{Ynone, Ynone, Yi32, Zcallcon, 1},
    529 }
    530 
    531 var yduff = []ytab{
    532 	{Ynone, Ynone, Yi32, Zcallduff, 1},
    533 }
    534 
    535 var yjmp = []ytab{
    536 	{Ynone, Ynone, Yml, Zo_m64, 2},
    537 	{Ynone, Ynone, Ybr, Zjmp, 0},
    538 	{Ynone, Ynone, Yi32, Zjmpcon, 1},
    539 }
    540 
    541 var yfmvd = []ytab{
    542 	{Ym, Ynone, Yf0, Zm_o, 2},
    543 	{Yf0, Ynone, Ym, Zo_m, 2},
    544 	{Yrf, Ynone, Yf0, Zm_o, 2},
    545 	{Yf0, Ynone, Yrf, Zo_m, 2},
    546 }
    547 
    548 var yfmvdp = []ytab{
    549 	{Yf0, Ynone, Ym, Zo_m, 2},
    550 	{Yf0, Ynone, Yrf, Zo_m, 2},
    551 }
    552 
    553 var yfmvf = []ytab{
    554 	{Ym, Ynone, Yf0, Zm_o, 2},
    555 	{Yf0, Ynone, Ym, Zo_m, 2},
    556 }
    557 
    558 var yfmvx = []ytab{
    559 	{Ym, Ynone, Yf0, Zm_o, 2},
    560 }
    561 
    562 var yfmvp = []ytab{
    563 	{Yf0, Ynone, Ym, Zo_m, 2},
    564 }
    565 
    566 var yfcmv = []ytab{
    567 	{Yrf, Ynone, Yf0, Zm_o, 2},
    568 }
    569 
    570 var yfadd = []ytab{
    571 	{Ym, Ynone, Yf0, Zm_o, 2},
    572 	{Yrf, Ynone, Yf0, Zm_o, 2},
    573 	{Yf0, Ynone, Yrf, Zo_m, 2},
    574 }
    575 
    576 var yfaddp = []ytab{
    577 	{Yf0, Ynone, Yrf, Zo_m, 2},
    578 }
    579 
    580 var yfxch = []ytab{
    581 	{Yf0, Ynone, Yrf, Zo_m, 2},
    582 	{Yrf, Ynone, Yf0, Zm_o, 2},
    583 }
    584 
    585 var ycompp = []ytab{
    586 	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
    587 }
    588 
    589 var ystsw = []ytab{
    590 	{Ynone, Ynone, Ym, Zo_m, 2},
    591 	{Ynone, Ynone, Yax, Zlit, 1},
    592 }
    593 
    594 var ystcw = []ytab{
    595 	{Ynone, Ynone, Ym, Zo_m, 2},
    596 	{Ym, Ynone, Ynone, Zm_o, 2},
    597 }
    598 
    599 var ysvrs = []ytab{
    600 	{Ynone, Ynone, Ym, Zo_m, 2},
    601 	{Ym, Ynone, Ynone, Zm_o, 2},
    602 }
    603 
    604 var ymm = []ytab{
    605 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
    606 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    607 }
    608 
    609 var yxm = []ytab{
    610 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    611 }
    612 
    613 var yxcvm1 = []ytab{
    614 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    615 	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
    616 }
    617 
    618 var yxcvm2 = []ytab{
    619 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    620 	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
    621 }
    622 
    623 /*
    624 var yxmq = []ytab{
    625 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    626 }
    627 */
    628 
    629 var yxr = []ytab{
    630 	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
    631 }
    632 
    633 var yxr_ml = []ytab{
    634 	{Yxr, Ynone, Yml, Zr_m_xm, 1},
    635 }
    636 
    637 var ymr = []ytab{
    638 	{Ymr, Ynone, Ymr, Zm_r, 1},
    639 }
    640 
    641 var ymr_ml = []ytab{
    642 	{Ymr, Ynone, Yml, Zr_m_xm, 1},
    643 }
    644 
    645 var yxcmp = []ytab{
    646 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    647 }
    648 
    649 var yxcmpi = []ytab{
    650 	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
    651 }
    652 
    653 var yxmov = []ytab{
    654 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    655 	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
    656 }
    657 
    658 var yxcvfl = []ytab{
    659 	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
    660 }
    661 
    662 var yxcvlf = []ytab{
    663 	{Yml, Ynone, Yxr, Zm_r_xm, 1},
    664 }
    665 
    666 var yxcvfq = []ytab{
    667 	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
    668 }
    669 
    670 var yxcvqf = []ytab{
    671 	{Yml, Ynone, Yxr, Zm_r_xm, 2},
    672 }
    673 
    674 var yps = []ytab{
    675 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
    676 	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
    677 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    678 	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
    679 }
    680 
    681 var yxrrl = []ytab{
    682 	{Yxr, Ynone, Yrl, Zm_r, 1},
    683 }
    684 
    685 var ymfp = []ytab{
    686 	{Ymm, Ynone, Ymr, Zm_r_3d, 1},
    687 }
    688 
    689 var ymrxr = []ytab{
    690 	{Ymr, Ynone, Yxr, Zm_r, 1},
    691 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    692 }
    693 
    694 var ymshuf = []ytab{
    695 	{Yi8, Ymm, Ymr, Zibm_r, 2},
    696 }
    697 
    698 var ymshufb = []ytab{
    699 	{Yxm, Ynone, Yxr, Zm2_r, 2},
    700 }
    701 
    702 var yxshuf = []ytab{
    703 	{Yu8, Yxm, Yxr, Zibm_r, 2},
    704 }
    705 
    706 var yextrw = []ytab{
    707 	{Yu8, Yxr, Yrl, Zibm_r, 2},
    708 }
    709 
    710 var yinsrw = []ytab{
    711 	{Yu8, Yml, Yxr, Zibm_r, 2},
    712 }
    713 
    714 var yinsr = []ytab{
    715 	{Yu8, Ymm, Yxr, Zibm_r, 3},
    716 }
    717 
    718 var ypsdq = []ytab{
    719 	{Yi8, Ynone, Yxr, Zibo_m, 2},
    720 }
    721 
    722 var ymskb = []ytab{
    723 	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
    724 	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
    725 }
    726 
    727 var ycrc32l = []ytab{
    728 	{Yml, Ynone, Yrl, Zlitm_r, 0},
    729 }
    730 
    731 var yprefetch = []ytab{
    732 	{Ym, Ynone, Ynone, Zm_o, 2},
    733 }
    734 
    735 var yaes = []ytab{
    736 	{Yxm, Ynone, Yxr, Zlitm_r, 2},
    737 }
    738 
    739 var yaes2 = []ytab{
    740 	{Yu8, Yxm, Yxr, Zibm_r, 2},
    741 }
    742 
    743 /*
    744  * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
    745  * and p->from and p->to as operands (Addr*).  The linker scans optab to find
    746  * the entry with the given p->as and then looks through the ytable for that
    747  * instruction (the second field in the optab struct) for a line whose first
    748  * two values match the Ytypes of the p->from and p->to operands.  The function
    749  * oclass in span.c computes the specific Ytype of an operand and then the set
    750  * of more general Ytypes that it satisfies is implied by the ycover table, set
    751  * up in instinit.  For example, oclass distinguishes the constants 0 and 1
    752  * from the more general 8-bit constants, but instinit says
    753  *
    754  *        ycover[Yi0*Ymax + Ys32] = 1;
    755  *        ycover[Yi1*Ymax + Ys32] = 1;
    756  *        ycover[Yi8*Ymax + Ys32] = 1;
    757  *
    758  * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
    759  * if that's what an instruction can handle.
    760  *
    761  * In parallel with the scan through the ytable for the appropriate line, there
    762  * is a z pointer that starts out pointing at the strange magic byte list in
    763  * the Optab struct.  With each step past a non-matching ytable line, z
    764  * advances by the 4th entry in the line.  When a matching line is found, that
    765  * z pointer has the extra data to use in laying down the instruction bytes.
    766  * The actual bytes laid down are a function of the 3rd entry in the line (that
    767  * is, the Ztype) and the z bytes.
    768  *
    769  * For example, let's look at AADDL.  The optab line says:
    770  *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
    771  *
    772  * and yaddl says
    773  *        uchar   yaddl[] =
    774  *        {
    775  *                Yi8,    Yml,    Zibo_m, 2,
    776  *                Yi32,   Yax,    Zil_,   1,
    777  *                Yi32,   Yml,    Zilo_m, 2,
    778  *                Yrl,    Yml,    Zr_m,   1,
    779  *                Yml,    Yrl,    Zm_r,   1,
    780  *                0
    781  *        };
    782  *
    783  * so there are 5 possible types of ADDL instruction that can be laid down, and
    784  * possible states used to lay them down (Ztype and z pointer, assuming z
    785  * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
    786  *
    787  *        Yi8, Yml -> Zibo_m, z (0x83, 00)
    788  *        Yi32, Yax -> Zil_, z+2 (0x05)
    789  *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
    790  *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
    791  *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
    792  *
    793  * The Pconstant in the optab line controls the prefix bytes to emit.  That's
    794  * relatively straightforward as this program goes.
    795  *
    796  * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
    797  * example, is an opcode byte (z[0]) then an asmando (which is some kind of
    798  * encoded addressing mode for the Yml arg), and then a single immediate byte.
    799  * Zilo_m is the same but a long (32-bit) immediate.
    800  */
    801 var optab =
    802 /*	as, ytab, andproto, opcode */
    803 []Optab{
    804 	Optab{obj.AXXX, nil, 0, [23]uint8{}},
    805 	Optab{AAAA, ynone, P32, [23]uint8{0x37}},
    806 	Optab{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
    807 	Optab{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
    808 	Optab{AAAS, ynone, P32, [23]uint8{0x3f}},
    809 	Optab{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
    810 	Optab{AADCL, yxorl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    811 	Optab{AADCQ, yxorl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    812 	Optab{AADCW, yxorl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    813 	Optab{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
    814 	Optab{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    815 	Optab{AADDPD, yxm, Pq, [23]uint8{0x58}},
    816 	Optab{AADDPS, yxm, Pm, [23]uint8{0x58}},
    817 	Optab{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    818 	Optab{AADDSD, yxm, Pf2, [23]uint8{0x58}},
    819 	Optab{AADDSS, yxm, Pf3, [23]uint8{0x58}},
    820 	Optab{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    821 	Optab{AADJSP, nil, 0, [23]uint8{}},
    822 	Optab{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
    823 	Optab{AANDL, yxorl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    824 	Optab{AANDNPD, yxm, Pq, [23]uint8{0x55}},
    825 	Optab{AANDNPS, yxm, Pm, [23]uint8{0x55}},
    826 	Optab{AANDPD, yxm, Pq, [23]uint8{0x54}},
    827 	Optab{AANDPS, yxm, Pq, [23]uint8{0x54}},
    828 	Optab{AANDQ, yxorl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    829 	Optab{AANDW, yxorl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    830 	Optab{AARPL, yrl_ml, P32, [23]uint8{0x63}},
    831 	Optab{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
    832 	Optab{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
    833 	Optab{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
    834 	Optab{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
    835 	Optab{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
    836 	Optab{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
    837 	Optab{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
    838 	Optab{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
    839 	Optab{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
    840 	Optab{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
    841 	Optab{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
    842 	Optab{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
    843 	Optab{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
    844 	Optab{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
    845 	Optab{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
    846 	Optab{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
    847 	Optab{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
    848 	Optab{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
    849 	Optab{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
    850 	Optab{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
    851 	Optab{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
    852 	Optab{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
    853 	Optab{ABYTE, ybyte, Px, [23]uint8{1}},
    854 	Optab{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
    855 	Optab{ACDQ, ynone, Px, [23]uint8{0x99}},
    856 	Optab{ACLC, ynone, Px, [23]uint8{0xf8}},
    857 	Optab{ACLD, ynone, Px, [23]uint8{0xfc}},
    858 	Optab{ACLI, ynone, Px, [23]uint8{0xfa}},
    859 	Optab{ACLTS, ynone, Pm, [23]uint8{0x06}},
    860 	Optab{ACMC, ynone, Px, [23]uint8{0xf5}},
    861 	Optab{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
    862 	Optab{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
    863 	Optab{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
    864 	Optab{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
    865 	Optab{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
    866 	Optab{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
    867 	Optab{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
    868 	Optab{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
    869 	Optab{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
    870 	Optab{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
    871 	Optab{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
    872 	Optab{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
    873 	Optab{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
    874 	Optab{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
    875 	Optab{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
    876 	Optab{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
    877 	Optab{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
    878 	Optab{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
    879 	Optab{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
    880 	Optab{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
    881 	Optab{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
    882 	Optab{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
    883 	Optab{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
    884 	Optab{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
    885 	Optab{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
    886 	Optab{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
    887 	Optab{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
    888 	Optab{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
    889 	Optab{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
    890 	Optab{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
    891 	Optab{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
    892 	Optab{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
    893 	Optab{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
    894 	Optab{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
    895 	Optab{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
    896 	Optab{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
    897 	Optab{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
    898 	Optab{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
    899 	Optab{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
    900 	Optab{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
    901 	Optab{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
    902 	Optab{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
    903 	Optab{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
    904 	Optab{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
    905 	Optab{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
    906 	Optab{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
    907 	Optab{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
    908 	Optab{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
    909 	Optab{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
    910 	Optab{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
    911 	Optab{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
    912 	Optab{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
    913 	Optab{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
    914 	Optab{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
    915 	Optab{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
    916 	Optab{ACMPSL, ynone, Px, [23]uint8{0xa7}},
    917 	Optab{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
    918 	Optab{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
    919 	Optab{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
    920 	Optab{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
    921 	Optab{ACOMISD, yxcmp, Pe, [23]uint8{0x2f}},
    922 	Optab{ACOMISS, yxcmp, Pm, [23]uint8{0x2f}},
    923 	Optab{ACPUID, ynone, Pm, [23]uint8{0xa2}},
    924 	Optab{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
    925 	Optab{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
    926 	Optab{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
    927 	Optab{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
    928 	Optab{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
    929 	Optab{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
    930 	Optab{API2FW, ymfp, Px, [23]uint8{0x0c}},
    931 	Optab{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
    932 	Optab{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
    933 	Optab{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
    934 	Optab{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
    935 	Optab{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
    936 	Optab{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
    937 	Optab{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
    938 	Optab{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
    939 	Optab{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
    940 	Optab{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
    941 	Optab{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
    942 	Optab{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
    943 	Optab{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
    944 	Optab{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
    945 	Optab{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
    946 	Optab{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
    947 	Optab{ACWD, ynone, Pe, [23]uint8{0x99}},
    948 	Optab{ACQO, ynone, Pw, [23]uint8{0x99}},
    949 	Optab{ADAA, ynone, P32, [23]uint8{0x27}},
    950 	Optab{ADAS, ynone, P32, [23]uint8{0x2f}},
    951 	Optab{obj.ADATA, nil, 0, [23]uint8{}},
    952 	Optab{ADECB, yincb, Pb, [23]uint8{0xfe, 01}},
    953 	Optab{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
    954 	Optab{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
    955 	Optab{ADECW, yincw, Pe, [23]uint8{0xff, 01}},
    956 	Optab{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
    957 	Optab{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
    958 	Optab{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
    959 	Optab{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
    960 	Optab{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
    961 	Optab{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
    962 	Optab{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
    963 	Optab{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
    964 	Optab{AEMMS, ynone, Pm, [23]uint8{0x77}},
    965 	Optab{AENTER, nil, 0, [23]uint8{}}, /* botch */
    966 	Optab{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
    967 	Optab{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
    968 	Optab{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
    969 	Optab{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
    970 	Optab{obj.AGLOBL, nil, 0, [23]uint8{}},
    971 	Optab{AHLT, ynone, Px, [23]uint8{0xf4}},
    972 	Optab{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
    973 	Optab{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
    974 	Optab{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
    975 	Optab{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
    976 	Optab{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
    977 	Optab{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
    978 	Optab{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
    979 	Optab{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
    980 	Optab{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
    981 	Optab{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
    982 	Optab{AINCB, yincb, Pb, [23]uint8{0xfe, 00}},
    983 	Optab{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
    984 	Optab{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
    985 	Optab{AINCW, yincw, Pe, [23]uint8{0xff, 00}},
    986 	Optab{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
    987 	Optab{AINSB, ynone, Pb, [23]uint8{0x6c}},
    988 	Optab{AINSL, ynone, Px, [23]uint8{0x6d}},
    989 	Optab{AINSW, ynone, Pe, [23]uint8{0x6d}},
    990 	Optab{AINT, yint, Px, [23]uint8{0xcd}},
    991 	Optab{AINTO, ynone, P32, [23]uint8{0xce}},
    992 	Optab{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
    993 	Optab{AIRETL, ynone, Px, [23]uint8{0xcf}},
    994 	Optab{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
    995 	Optab{AIRETW, ynone, Pe, [23]uint8{0xcf}},
    996 	Optab{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
    997 	Optab{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
    998 	Optab{AJCXZL, yloop, Px, [23]uint8{0xe3}},
    999 	Optab{AJCXZW, yloop, Px, [23]uint8{0xe3}},
   1000 	Optab{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
   1001 	Optab{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
   1002 	Optab{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
   1003 	Optab{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
   1004 	Optab{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
   1005 	Optab{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
   1006 	Optab{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
   1007 	Optab{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
   1008 	Optab{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
   1009 	Optab{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
   1010 	Optab{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
   1011 	Optab{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
   1012 	Optab{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
   1013 	Optab{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
   1014 	Optab{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
   1015 	Optab{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
   1016 	Optab{ALAHF, ynone, Px, [23]uint8{0x9f}},
   1017 	Optab{ALARL, yml_rl, Pm, [23]uint8{0x02}},
   1018 	Optab{ALARW, yml_rl, Pq, [23]uint8{0x02}},
   1019 	Optab{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
   1020 	Optab{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
   1021 	Optab{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
   1022 	Optab{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
   1023 	Optab{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
   1024 	Optab{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
   1025 	Optab{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
   1026 	Optab{ALOCK, ynone, Px, [23]uint8{0xf0}},
   1027 	Optab{ALODSB, ynone, Pb, [23]uint8{0xac}},
   1028 	Optab{ALODSL, ynone, Px, [23]uint8{0xad}},
   1029 	Optab{ALODSQ, ynone, Pw, [23]uint8{0xad}},
   1030 	Optab{ALODSW, ynone, Pe, [23]uint8{0xad}},
   1031 	Optab{ALONG, ybyte, Px, [23]uint8{4}},
   1032 	Optab{ALOOP, yloop, Px, [23]uint8{0xe2}},
   1033 	Optab{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
   1034 	Optab{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
   1035 	Optab{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
   1036 	Optab{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
   1037 	Optab{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
   1038 	Optab{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
   1039 	Optab{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
   1040 	Optab{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
   1041 	Optab{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
   1042 	Optab{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
   1043 	Optab{AMINPD, yxm, Pe, [23]uint8{0x5d}},
   1044 	Optab{AMINPS, yxm, Pm, [23]uint8{0x5d}},
   1045 	Optab{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
   1046 	Optab{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
   1047 	Optab{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
   1048 	Optab{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
   1049 	Optab{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
   1050 	Optab{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
   1051 	Optab{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1052 	Optab{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
   1053 	Optab{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1054 	Optab{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
   1055 	Optab{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
   1056 	Optab{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
   1057 	Optab{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
   1058 	Optab{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
   1059 	Optab{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
   1060 	Optab{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
   1061 	Optab{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1062 	Optab{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
   1063 	Optab{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
   1064 	Optab{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
   1065 	Optab{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
   1066 	Optab{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
   1067 	Optab{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
   1068 	Optab{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
   1069 	Optab{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
   1070 	Optab{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
   1071 	Optab{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
   1072 	Optab{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
   1073 	Optab{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1074 	Optab{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
   1075 	Optab{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
   1076 	Optab{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
   1077 	Optab{AMOVSL, ynone, Px, [23]uint8{0xa5}},
   1078 	Optab{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
   1079 	Optab{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
   1080 	Optab{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
   1081 	Optab{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
   1082 	Optab{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
   1083 	Optab{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
   1084 	Optab{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
   1085 	Optab{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
   1086 	Optab{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
   1087 	Optab{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
   1088 	Optab{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
   1089 	Optab{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
   1090 	Optab{AMULPD, yxm, Pe, [23]uint8{0x59}},
   1091 	Optab{AMULPS, yxm, Ym, [23]uint8{0x59}},
   1092 	Optab{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
   1093 	Optab{AMULSD, yxm, Pf2, [23]uint8{0x59}},
   1094 	Optab{AMULSS, yxm, Pf3, [23]uint8{0x59}},
   1095 	Optab{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
   1096 	Optab{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
   1097 	Optab{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
   1098 	Optab{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
   1099 	Optab{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
   1100 	Optab{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
   1101 	Optab{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
   1102 	Optab{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
   1103 	Optab{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
   1104 	Optab{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
   1105 	Optab{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
   1106 	Optab{AORL, yxorl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1107 	Optab{AORPD, yxm, Pq, [23]uint8{0x56}},
   1108 	Optab{AORPS, yxm, Pm, [23]uint8{0x56}},
   1109 	Optab{AORQ, yxorl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1110 	Optab{AORW, yxorl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1111 	Optab{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
   1112 	Optab{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
   1113 	Optab{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
   1114 	Optab{AOUTSL, ynone, Px, [23]uint8{0x6f}},
   1115 	Optab{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
   1116 	Optab{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
   1117 	Optab{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
   1118 	Optab{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
   1119 	Optab{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
   1120 	Optab{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
   1121 	Optab{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
   1122 	Optab{APADDQ, yxm, Pe, [23]uint8{0xd4}},
   1123 	Optab{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
   1124 	Optab{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
   1125 	Optab{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
   1126 	Optab{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
   1127 	Optab{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
   1128 	Optab{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
   1129 	Optab{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
   1130 	Optab{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
   1131 	Optab{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
   1132 	Optab{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
   1133 	Optab{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
   1134 	Optab{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
   1135 	Optab{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
   1136 	Optab{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
   1137 	Optab{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
   1138 	Optab{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
   1139 	Optab{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
   1140 	Optab{APF2IL, ymfp, Px, [23]uint8{0x1d}},
   1141 	Optab{APF2IW, ymfp, Px, [23]uint8{0x1c}},
   1142 	Optab{API2FL, ymfp, Px, [23]uint8{0x0d}},
   1143 	Optab{APFACC, ymfp, Px, [23]uint8{0xae}},
   1144 	Optab{APFADD, ymfp, Px, [23]uint8{0x9e}},
   1145 	Optab{APFCMPEQ, ymfp, Px, [23]uint8{0xb0}},
   1146 	Optab{APFCMPGE, ymfp, Px, [23]uint8{0x90}},
   1147 	Optab{APFCMPGT, ymfp, Px, [23]uint8{0xa0}},
   1148 	Optab{APFMAX, ymfp, Px, [23]uint8{0xa4}},
   1149 	Optab{APFMIN, ymfp, Px, [23]uint8{0x94}},
   1150 	Optab{APFMUL, ymfp, Px, [23]uint8{0xb4}},
   1151 	Optab{APFNACC, ymfp, Px, [23]uint8{0x8a}},
   1152 	Optab{APFPNACC, ymfp, Px, [23]uint8{0x8e}},
   1153 	Optab{APFRCP, ymfp, Px, [23]uint8{0x96}},
   1154 	Optab{APFRCPIT1, ymfp, Px, [23]uint8{0xa6}},
   1155 	Optab{APFRCPI2T, ymfp, Px, [23]uint8{0xb6}},
   1156 	Optab{APFRSQIT1, ymfp, Px, [23]uint8{0xa7}},
   1157 	Optab{APFRSQRT, ymfp, Px, [23]uint8{0x97}},
   1158 	Optab{APFSUB, ymfp, Px, [23]uint8{0x9a}},
   1159 	Optab{APFSUBR, ymfp, Px, [23]uint8{0xaa}},
   1160 	Optab{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
   1161 	Optab{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
   1162 	Optab{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
   1163 	Optab{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
   1164 	Optab{APMAXSW, yxm, Pe, [23]uint8{0xee}},
   1165 	Optab{APMAXUB, yxm, Pe, [23]uint8{0xde}},
   1166 	Optab{APMINSW, yxm, Pe, [23]uint8{0xea}},
   1167 	Optab{APMINUB, yxm, Pe, [23]uint8{0xda}},
   1168 	Optab{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
   1169 	Optab{APMULHRW, ymfp, Px, [23]uint8{0xb7}},
   1170 	Optab{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
   1171 	Optab{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
   1172 	Optab{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
   1173 	Optab{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
   1174 	Optab{APOPAL, ynone, P32, [23]uint8{0x61}},
   1175 	Optab{APOPAW, ynone, Pe, [23]uint8{0x61}},
   1176 	Optab{APOPFL, ynone, P32, [23]uint8{0x9d}},
   1177 	Optab{APOPFQ, ynone, Py, [23]uint8{0x9d}},
   1178 	Optab{APOPFW, ynone, Pe, [23]uint8{0x9d}},
   1179 	Optab{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
   1180 	Optab{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
   1181 	Optab{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
   1182 	Optab{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
   1183 	Optab{APSADBW, yxm, Pq, [23]uint8{0xf6}},
   1184 	Optab{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
   1185 	Optab{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
   1186 	Optab{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
   1187 	Optab{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
   1188 	Optab{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
   1189 	Optab{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
   1190 	Optab{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
   1191 	Optab{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
   1192 	Optab{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
   1193 	Optab{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
   1194 	Optab{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
   1195 	Optab{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
   1196 	Optab{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
   1197 	Optab{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
   1198 	Optab{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xe1, Pe, 0x71, 02}},
   1199 	Optab{APSUBB, yxm, Pe, [23]uint8{0xf8}},
   1200 	Optab{APSUBL, yxm, Pe, [23]uint8{0xfa}},
   1201 	Optab{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
   1202 	Optab{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
   1203 	Optab{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
   1204 	Optab{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
   1205 	Optab{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
   1206 	Optab{APSUBW, yxm, Pe, [23]uint8{0xf9}},
   1207 	Optab{APSWAPL, ymfp, Px, [23]uint8{0xbb}},
   1208 	Optab{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
   1209 	Optab{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
   1210 	Optab{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
   1211 	Optab{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
   1212 	Optab{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
   1213 	Optab{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
   1214 	Optab{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
   1215 	Optab{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
   1216 	Optab{APUSHAL, ynone, P32, [23]uint8{0x60}},
   1217 	Optab{APUSHAW, ynone, Pe, [23]uint8{0x60}},
   1218 	Optab{APUSHFL, ynone, P32, [23]uint8{0x9c}},
   1219 	Optab{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
   1220 	Optab{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
   1221 	Optab{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1222 	Optab{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1223 	Optab{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1224 	Optab{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
   1225 	Optab{AQUAD, ybyte, Px, [23]uint8{8}},
   1226 	Optab{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
   1227 	Optab{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1228 	Optab{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1229 	Optab{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1230 	Optab{ARCPPS, yxm, Pm, [23]uint8{0x53}},
   1231 	Optab{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
   1232 	Optab{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
   1233 	Optab{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1234 	Optab{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1235 	Optab{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1236 	Optab{AREP, ynone, Px, [23]uint8{0xf3}},
   1237 	Optab{AREPN, ynone, Px, [23]uint8{0xf2}},
   1238 	Optab{obj.ARET, ynone, Px, [23]uint8{0xc3}},
   1239 	Optab{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
   1240 	Optab{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
   1241 	Optab{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
   1242 	Optab{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
   1243 	Optab{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1244 	Optab{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1245 	Optab{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1246 	Optab{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
   1247 	Optab{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1248 	Optab{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1249 	Optab{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1250 	Optab{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
   1251 	Optab{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
   1252 	Optab{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
   1253 	Optab{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1254 	Optab{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1255 	Optab{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1256 	Optab{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1257 	Optab{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
   1258 	Optab{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1259 	Optab{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1260 	Optab{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1261 	Optab{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
   1262 	Optab{ASBBL, yxorl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1263 	Optab{ASBBQ, yxorl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1264 	Optab{ASBBW, yxorl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1265 	Optab{ASCASB, ynone, Pb, [23]uint8{0xae}},
   1266 	Optab{ASCASL, ynone, Px, [23]uint8{0xaf}},
   1267 	Optab{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
   1268 	Optab{ASCASW, ynone, Pe, [23]uint8{0xaf}},
   1269 	Optab{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
   1270 	Optab{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
   1271 	Optab{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
   1272 	Optab{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
   1273 	Optab{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
   1274 	Optab{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
   1275 	Optab{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
   1276 	Optab{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
   1277 	Optab{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
   1278 	Optab{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
   1279 	Optab{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
   1280 	Optab{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
   1281 	Optab{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
   1282 	Optab{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
   1283 	Optab{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
   1284 	Optab{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
   1285 	Optab{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1286 	Optab{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1287 	Optab{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1288 	Optab{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1289 	Optab{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
   1290 	Optab{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1291 	Optab{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1292 	Optab{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1293 	Optab{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
   1294 	Optab{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
   1295 	Optab{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
   1296 	Optab{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
   1297 	Optab{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
   1298 	Optab{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
   1299 	Optab{ASTC, ynone, Px, [23]uint8{0xf9}},
   1300 	Optab{ASTD, ynone, Px, [23]uint8{0xfd}},
   1301 	Optab{ASTI, ynone, Px, [23]uint8{0xfb}},
   1302 	Optab{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
   1303 	Optab{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
   1304 	Optab{ASTOSL, ynone, Px, [23]uint8{0xab}},
   1305 	Optab{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
   1306 	Optab{ASTOSW, ynone, Pe, [23]uint8{0xab}},
   1307 	Optab{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
   1308 	Optab{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1309 	Optab{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
   1310 	Optab{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
   1311 	Optab{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1312 	Optab{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
   1313 	Optab{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
   1314 	Optab{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1315 	Optab{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
   1316 	Optab{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
   1317 	Optab{ATESTB, ytestb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
   1318 	Optab{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1319 	Optab{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1320 	Optab{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1321 	Optab{obj.ATEXT, ytext, Px, [23]uint8{}},
   1322 	Optab{AUCOMISD, yxcmp, Pe, [23]uint8{0x2e}},
   1323 	Optab{AUCOMISS, yxcmp, Pm, [23]uint8{0x2e}},
   1324 	Optab{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
   1325 	Optab{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
   1326 	Optab{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
   1327 	Optab{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
   1328 	Optab{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
   1329 	Optab{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
   1330 	Optab{AWAIT, ynone, Px, [23]uint8{0x9b}},
   1331 	Optab{AWORD, ybyte, Px, [23]uint8{2}},
   1332 	Optab{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
   1333 	Optab{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1334 	Optab{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1335 	Optab{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1336 	Optab{AXLAT, ynone, Px, [23]uint8{0xd7}},
   1337 	Optab{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
   1338 	Optab{AXORL, yxorl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1339 	Optab{AXORPD, yxm, Pe, [23]uint8{0x57}},
   1340 	Optab{AXORPS, yxm, Pm, [23]uint8{0x57}},
   1341 	Optab{AXORQ, yxorl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1342 	Optab{AXORW, yxorl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1343 	Optab{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
   1344 	Optab{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
   1345 	Optab{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
   1346 	Optab{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
   1347 	Optab{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
   1348 	Optab{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
   1349 	Optab{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
   1350 	Optab{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
   1351 	Optab{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
   1352 	Optab{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
   1353 	Optab{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
   1354 	Optab{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
   1355 	Optab{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
   1356 	Optab{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
   1357 	Optab{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
   1358 	Optab{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
   1359 	Optab{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
   1360 	Optab{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
   1361 	Optab{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
   1362 	Optab{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
   1363 	Optab{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
   1364 	Optab{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
   1365 	Optab{AFCOMB, nil, 0, [23]uint8{}},
   1366 	Optab{AFCOMBP, nil, 0, [23]uint8{}},
   1367 	Optab{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
   1368 	Optab{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
   1369 	Optab{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
   1370 	Optab{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
   1371 	Optab{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
   1372 	Optab{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
   1373 	Optab{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
   1374 	Optab{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
   1375 	Optab{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
   1376 	Optab{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
   1377 	Optab{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
   1378 	Optab{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
   1379 	Optab{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
   1380 	Optab{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
   1381 	Optab{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
   1382 	Optab{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
   1383 	Optab{AFADDDP, yfaddp, Px, [23]uint8{0xde, 00}},
   1384 	Optab{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
   1385 	Optab{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
   1386 	Optab{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
   1387 	Optab{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
   1388 	Optab{AFMULDP, yfaddp, Px, [23]uint8{0xde, 01}},
   1389 	Optab{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
   1390 	Optab{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
   1391 	Optab{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
   1392 	Optab{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
   1393 	Optab{AFSUBDP, yfaddp, Px, [23]uint8{0xde, 05}},
   1394 	Optab{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
   1395 	Optab{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
   1396 	Optab{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
   1397 	Optab{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
   1398 	Optab{AFSUBRDP, yfaddp, Px, [23]uint8{0xde, 04}},
   1399 	Optab{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
   1400 	Optab{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
   1401 	Optab{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
   1402 	Optab{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
   1403 	Optab{AFDIVDP, yfaddp, Px, [23]uint8{0xde, 07}},
   1404 	Optab{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
   1405 	Optab{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
   1406 	Optab{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
   1407 	Optab{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
   1408 	Optab{AFDIVRDP, yfaddp, Px, [23]uint8{0xde, 06}},
   1409 	Optab{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
   1410 	Optab{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
   1411 	Optab{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
   1412 	Optab{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
   1413 	Optab{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
   1414 	Optab{AFFREE, nil, 0, [23]uint8{}},
   1415 	Optab{AFLDCW, ystcw, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
   1416 	Optab{AFLDENV, ystcw, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
   1417 	Optab{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
   1418 	Optab{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
   1419 	Optab{AFSTCW, ystcw, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
   1420 	Optab{AFSTENV, ystcw, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
   1421 	Optab{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
   1422 	Optab{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
   1423 	Optab{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
   1424 	Optab{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
   1425 	Optab{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
   1426 	Optab{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
   1427 	Optab{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
   1428 	Optab{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
   1429 	Optab{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
   1430 	Optab{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
   1431 	Optab{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
   1432 	Optab{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
   1433 	Optab{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
   1434 	Optab{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
   1435 	Optab{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
   1436 	Optab{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
   1437 	Optab{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
   1438 	Optab{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
   1439 	Optab{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
   1440 	Optab{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
   1441 	Optab{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
   1442 	Optab{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
   1443 	Optab{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
   1444 	Optab{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
   1445 	Optab{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
   1446 	Optab{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
   1447 	Optab{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
   1448 	Optab{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
   1449 	Optab{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
   1450 	Optab{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
   1451 	Optab{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
   1452 	Optab{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
   1453 	Optab{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
   1454 	Optab{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
   1455 	Optab{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
   1456 	Optab{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
   1457 	Optab{AINVD, ynone, Pm, [23]uint8{0x08}},
   1458 	Optab{AINVLPG, ymbs, Pm, [23]uint8{0x01, 07}},
   1459 	Optab{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
   1460 	Optab{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
   1461 	Optab{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
   1462 	Optab{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
   1463 	Optab{ARDMSR, ynone, Pm, [23]uint8{0x32}},
   1464 	Optab{ARDPMC, ynone, Pm, [23]uint8{0x33}},
   1465 	Optab{ARDTSC, ynone, Pm, [23]uint8{0x31}},
   1466 	Optab{ARSM, ynone, Pm, [23]uint8{0xaa}},
   1467 	Optab{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
   1468 	Optab{ASYSRET, ynone, Pm, [23]uint8{0x07}},
   1469 	Optab{AWBINVD, ynone, Pm, [23]uint8{0x09}},
   1470 	Optab{AWRMSR, ynone, Pm, [23]uint8{0x30}},
   1471 	Optab{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
   1472 	Optab{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
   1473 	Optab{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
   1474 	Optab{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
   1475 	Optab{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
   1476 	Optab{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1477 	Optab{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
   1478 	Optab{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
   1479 	Optab{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
   1480 	Optab{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
   1481 	Optab{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
   1482 	Optab{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
   1483 	Optab{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
   1484 	Optab{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
   1485 	Optab{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
   1486 	Optab{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
   1487 	Optab{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
   1488 	Optab{AAESKEYGENASSIST, yaes2, Pq, [23]uint8{0x3a, 0xdf, 0}},
   1489 	Optab{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
   1490 	Optab{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
   1491 	Optab{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
   1492 	Optab{obj.ATYPE, nil, 0, [23]uint8{}},
   1493 	Optab{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
   1494 	Optab{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
   1495 	Optab{obj.ACHECKNIL, nil, 0, [23]uint8{}},
   1496 	Optab{obj.AVARDEF, nil, 0, [23]uint8{}},
   1497 	Optab{obj.AVARKILL, nil, 0, [23]uint8{}},
   1498 	Optab{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
   1499 	Optab{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
   1500 	Optab{obj.AEND, nil, 0, [23]uint8{}},
   1501 	Optab{0, nil, 0, [23]uint8{}},
   1502 }
   1503 
   1504 var opindex [(ALAST + 1) & obj.AMask]*Optab
   1505 
   1506 // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
   1507 // This happens on systems like Solaris that call .so functions instead of system calls.
   1508 // It does not seem to be necessary for any other systems. This is probably working
   1509 // around a Solaris-specific bug that should be fixed differently, but we don't know
   1510 // what that bug is. And this does fix it.
   1511 func isextern(s *obj.LSym) bool {
   1512 	// All the Solaris dynamic imports from libc.so begin with "libc_".
   1513 	return strings.HasPrefix(s.Name, "libc_")
   1514 }
   1515 
   1516 // single-instruction no-ops of various lengths.
   1517 // constructed by hand and disassembled with gdb to verify.
   1518 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
   1519 var nop = [][16]uint8{
   1520 	[16]uint8{0x90},
   1521 	[16]uint8{0x66, 0x90},
   1522 	[16]uint8{0x0F, 0x1F, 0x00},
   1523 	[16]uint8{0x0F, 0x1F, 0x40, 0x00},
   1524 	[16]uint8{0x0F, 0x1F, 0x44, 0x00, 0x00},
   1525 	[16]uint8{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
   1526 	[16]uint8{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
   1527 	[16]uint8{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1528 	[16]uint8{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1529 }
   1530 
   1531 // Native Client rejects the repeated 0x66 prefix.
   1532 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1533 func fillnop(p []byte, n int) {
   1534 	var m int
   1535 
   1536 	for n > 0 {
   1537 		m = n
   1538 		if m > len(nop) {
   1539 			m = len(nop)
   1540 		}
   1541 		copy(p[:m], nop[m-1][:m])
   1542 		p = p[m:]
   1543 		n -= m
   1544 	}
   1545 }
   1546 
   1547 func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
   1548 	obj.Symgrow(ctxt, s, int64(c)+int64(pad))
   1549 	fillnop(s.P[c:], int(pad))
   1550 	return c + pad
   1551 }
   1552 
   1553 func spadjop(ctxt *obj.Link, p *obj.Prog, l int, q int) int {
   1554 	if p.Mode != 64 || ctxt.Arch.Ptrsize == 4 {
   1555 		return l
   1556 	}
   1557 	return q
   1558 }
   1559 
   1560 func span6(ctxt *obj.Link, s *obj.LSym) {
   1561 	ctxt.Cursym = s
   1562 
   1563 	if s.P != nil {
   1564 		return
   1565 	}
   1566 
   1567 	if ycover[0] == 0 {
   1568 		instinit()
   1569 	}
   1570 
   1571 	var v int32
   1572 	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
   1573 		if p.To.Type == obj.TYPE_BRANCH {
   1574 			if p.Pcond == nil {
   1575 				p.Pcond = p
   1576 			}
   1577 		}
   1578 		if p.As == AADJSP {
   1579 			p.To.Type = obj.TYPE_REG
   1580 			p.To.Reg = REG_SP
   1581 			v = int32(-p.From.Offset)
   1582 			p.From.Offset = int64(v)
   1583 			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
   1584 			if v < 0 {
   1585 				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
   1586 				v = -v
   1587 				p.From.Offset = int64(v)
   1588 			}
   1589 
   1590 			if v == 0 {
   1591 				p.As = obj.ANOP
   1592 			}
   1593 		}
   1594 	}
   1595 
   1596 	var q *obj.Prog
   1597 	for p := s.Text; p != nil; p = p.Link {
   1598 		p.Back = 2 // use short branches first time through
   1599 		q = p.Pcond
   1600 		if q != nil && (q.Back&2 != 0) {
   1601 			p.Back |= 1 // backward jump
   1602 			q.Back |= 4 // loop head
   1603 		}
   1604 
   1605 		if p.As == AADJSP {
   1606 			p.To.Type = obj.TYPE_REG
   1607 			p.To.Reg = REG_SP
   1608 			v = int32(-p.From.Offset)
   1609 			p.From.Offset = int64(v)
   1610 			p.As = int16(spadjop(ctxt, p, AADDL, AADDQ))
   1611 			if v < 0 {
   1612 				p.As = int16(spadjop(ctxt, p, ASUBL, ASUBQ))
   1613 				v = -v
   1614 				p.From.Offset = int64(v)
   1615 			}
   1616 
   1617 			if v == 0 {
   1618 				p.As = obj.ANOP
   1619 			}
   1620 		}
   1621 	}
   1622 
   1623 	n := 0
   1624 	var bp []byte
   1625 	var c int32
   1626 	var i int
   1627 	var loop int32
   1628 	var m int
   1629 	var p *obj.Prog
   1630 	for {
   1631 		loop = 0
   1632 		for i = 0; i < len(s.R); i++ {
   1633 			s.R[i] = obj.Reloc{}
   1634 		}
   1635 		s.R = s.R[:0]
   1636 		s.P = s.P[:0]
   1637 		c = 0
   1638 		for p = s.Text; p != nil; p = p.Link {
   1639 			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
   1640 				var deferreturn *obj.LSym
   1641 
   1642 				if deferreturn == nil {
   1643 					deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
   1644 				}
   1645 
   1646 				// pad everything to avoid crossing 32-byte boundary
   1647 				if c>>5 != (c+int32(p.Isize)-1)>>5 {
   1648 					c = naclpad(ctxt, s, c, -c&31)
   1649 				}
   1650 
   1651 				// pad call deferreturn to start at 32-byte boundary
   1652 				// so that subtracting 5 in jmpdefer will jump back
   1653 				// to that boundary and rerun the call.
   1654 				if p.As == obj.ACALL && p.To.Sym == deferreturn {
   1655 					c = naclpad(ctxt, s, c, -c&31)
   1656 				}
   1657 
   1658 				// pad call to end at 32-byte boundary
   1659 				if p.As == obj.ACALL {
   1660 					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
   1661 				}
   1662 
   1663 				// the linker treats REP and STOSQ as different instructions
   1664 				// but in fact the REP is a prefix on the STOSQ.
   1665 				// make sure REP has room for 2 more bytes, so that
   1666 				// padding will not be inserted before the next instruction.
   1667 				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
   1668 					c = naclpad(ctxt, s, c, -c&31)
   1669 				}
   1670 
   1671 				// same for LOCK.
   1672 				// various instructions follow; the longest is 4 bytes.
   1673 				// give ourselves 8 bytes so as to avoid surprises.
   1674 				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
   1675 					c = naclpad(ctxt, s, c, -c&31)
   1676 				}
   1677 			}
   1678 
   1679 			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
   1680 				// pad with NOPs
   1681 				v = -c & (LoopAlign - 1)
   1682 
   1683 				if v <= MaxLoopPad {
   1684 					obj.Symgrow(ctxt, s, int64(c)+int64(v))
   1685 					fillnop(s.P[c:], int(v))
   1686 					c += v
   1687 				}
   1688 			}
   1689 
   1690 			p.Pc = int64(c)
   1691 
   1692 			// process forward jumps to p
   1693 			for q = p.Rel; q != nil; q = q.Forwd {
   1694 				v = int32(p.Pc - (q.Pc + int64(q.Mark)))
   1695 				if q.Back&2 != 0 { // short
   1696 					if v > 127 {
   1697 						loop++
   1698 						q.Back ^= 2
   1699 					}
   1700 
   1701 					if q.As == AJCXZL {
   1702 						s.P[q.Pc+2] = byte(v)
   1703 					} else {
   1704 						s.P[q.Pc+1] = byte(v)
   1705 					}
   1706 				} else {
   1707 					bp = s.P[q.Pc+int64(q.Mark)-4:]
   1708 					bp[0] = byte(v)
   1709 					bp = bp[1:]
   1710 					bp[0] = byte(v >> 8)
   1711 					bp = bp[1:]
   1712 					bp[0] = byte(v >> 16)
   1713 					bp = bp[1:]
   1714 					bp[0] = byte(v >> 24)
   1715 				}
   1716 			}
   1717 
   1718 			p.Rel = nil
   1719 
   1720 			p.Pc = int64(c)
   1721 			asmins(ctxt, p)
   1722 			m = -cap(ctxt.Andptr) + cap(ctxt.And[:])
   1723 			if int(p.Isize) != m {
   1724 				p.Isize = uint8(m)
   1725 				loop++
   1726 			}
   1727 
   1728 			obj.Symgrow(ctxt, s, p.Pc+int64(m))
   1729 			copy(s.P[p.Pc:][:m], ctxt.And[:m])
   1730 			p.Mark = uint16(m)
   1731 			c += int32(m)
   1732 		}
   1733 
   1734 		n++
   1735 		if n > 20 {
   1736 			ctxt.Diag("span must be looping")
   1737 			log.Fatalf("loop")
   1738 		}
   1739 		if loop == 0 {
   1740 			break
   1741 		}
   1742 	}
   1743 
   1744 	if ctxt.Headtype == obj.Hnacl {
   1745 		c = naclpad(ctxt, s, c, -c&31)
   1746 	}
   1747 
   1748 	c += -c & (FuncAlign - 1)
   1749 	s.Size = int64(c)
   1750 
   1751 	if false { /* debug['a'] > 1 */
   1752 		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
   1753 		var i int
   1754 		for i = 0; i < len(s.P); i++ {
   1755 			fmt.Printf(" %.2x", s.P[i])
   1756 			if i%16 == 15 {
   1757 				fmt.Printf("\n  %.6x", uint(i+1))
   1758 			}
   1759 		}
   1760 
   1761 		if i%16 != 0 {
   1762 			fmt.Printf("\n")
   1763 		}
   1764 
   1765 		for i := 0; i < len(s.R); i++ {
   1766 			r := &s.R[i]
   1767 			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
   1768 		}
   1769 	}
   1770 }
   1771 
   1772 func instinit() {
   1773 	var c int
   1774 
   1775 	for i := 1; optab[i].as != 0; i++ {
   1776 		c = int(optab[i].as)
   1777 		if opindex[c&obj.AMask] != nil {
   1778 			log.Fatalf("phase error in optab: %d (%v)", i, obj.Aconv(c))
   1779 		}
   1780 		opindex[c&obj.AMask] = &optab[i]
   1781 	}
   1782 
   1783 	for i := 0; i < Ymax; i++ {
   1784 		ycover[i*Ymax+i] = 1
   1785 	}
   1786 
   1787 	ycover[Yi0*Ymax+Yi8] = 1
   1788 	ycover[Yi1*Ymax+Yi8] = 1
   1789 	ycover[Yu7*Ymax+Yi8] = 1
   1790 
   1791 	ycover[Yi0*Ymax+Yu7] = 1
   1792 	ycover[Yi1*Ymax+Yu7] = 1
   1793 
   1794 	ycover[Yi0*Ymax+Yu8] = 1
   1795 	ycover[Yi1*Ymax+Yu8] = 1
   1796 	ycover[Yu7*Ymax+Yu8] = 1
   1797 
   1798 	ycover[Yi0*Ymax+Ys32] = 1
   1799 	ycover[Yi1*Ymax+Ys32] = 1
   1800 	ycover[Yu7*Ymax+Ys32] = 1
   1801 	ycover[Yu8*Ymax+Ys32] = 1
   1802 	ycover[Yi8*Ymax+Ys32] = 1
   1803 
   1804 	ycover[Yi0*Ymax+Yi32] = 1
   1805 	ycover[Yi1*Ymax+Yi32] = 1
   1806 	ycover[Yu7*Ymax+Yi32] = 1
   1807 	ycover[Yu8*Ymax+Yi32] = 1
   1808 	ycover[Yi8*Ymax+Yi32] = 1
   1809 	ycover[Ys32*Ymax+Yi32] = 1
   1810 
   1811 	ycover[Yi0*Ymax+Yi64] = 1
   1812 	ycover[Yi1*Ymax+Yi64] = 1
   1813 	ycover[Yu7*Ymax+Yi64] = 1
   1814 	ycover[Yu8*Ymax+Yi64] = 1
   1815 	ycover[Yi8*Ymax+Yi64] = 1
   1816 	ycover[Ys32*Ymax+Yi64] = 1
   1817 	ycover[Yi32*Ymax+Yi64] = 1
   1818 
   1819 	ycover[Yal*Ymax+Yrb] = 1
   1820 	ycover[Ycl*Ymax+Yrb] = 1
   1821 	ycover[Yax*Ymax+Yrb] = 1
   1822 	ycover[Ycx*Ymax+Yrb] = 1
   1823 	ycover[Yrx*Ymax+Yrb] = 1
   1824 	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
   1825 
   1826 	ycover[Ycl*Ymax+Ycx] = 1
   1827 
   1828 	ycover[Yax*Ymax+Yrx] = 1
   1829 	ycover[Ycx*Ymax+Yrx] = 1
   1830 
   1831 	ycover[Yax*Ymax+Yrl] = 1
   1832 	ycover[Ycx*Ymax+Yrl] = 1
   1833 	ycover[Yrx*Ymax+Yrl] = 1
   1834 	ycover[Yrl32*Ymax+Yrl] = 1
   1835 
   1836 	ycover[Yf0*Ymax+Yrf] = 1
   1837 
   1838 	ycover[Yal*Ymax+Ymb] = 1
   1839 	ycover[Ycl*Ymax+Ymb] = 1
   1840 	ycover[Yax*Ymax+Ymb] = 1
   1841 	ycover[Ycx*Ymax+Ymb] = 1
   1842 	ycover[Yrx*Ymax+Ymb] = 1
   1843 	ycover[Yrb*Ymax+Ymb] = 1
   1844 	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
   1845 	ycover[Ym*Ymax+Ymb] = 1
   1846 
   1847 	ycover[Yax*Ymax+Yml] = 1
   1848 	ycover[Ycx*Ymax+Yml] = 1
   1849 	ycover[Yrx*Ymax+Yml] = 1
   1850 	ycover[Yrl*Ymax+Yml] = 1
   1851 	ycover[Yrl32*Ymax+Yml] = 1
   1852 	ycover[Ym*Ymax+Yml] = 1
   1853 
   1854 	ycover[Yax*Ymax+Ymm] = 1
   1855 	ycover[Ycx*Ymax+Ymm] = 1
   1856 	ycover[Yrx*Ymax+Ymm] = 1
   1857 	ycover[Yrl*Ymax+Ymm] = 1
   1858 	ycover[Yrl32*Ymax+Ymm] = 1
   1859 	ycover[Ym*Ymax+Ymm] = 1
   1860 	ycover[Ymr*Ymax+Ymm] = 1
   1861 
   1862 	ycover[Ym*Ymax+Yxm] = 1
   1863 	ycover[Yxr*Ymax+Yxm] = 1
   1864 
   1865 	for i := 0; i < MAXREG; i++ {
   1866 		reg[i] = -1
   1867 		if i >= REG_AL && i <= REG_R15B {
   1868 			reg[i] = (i - REG_AL) & 7
   1869 			if i >= REG_SPB && i <= REG_DIB {
   1870 				regrex[i] = 0x40
   1871 			}
   1872 			if i >= REG_R8B && i <= REG_R15B {
   1873 				regrex[i] = Rxr | Rxx | Rxb
   1874 			}
   1875 		}
   1876 
   1877 		if i >= REG_AH && i <= REG_BH {
   1878 			reg[i] = 4 + ((i - REG_AH) & 7)
   1879 		}
   1880 		if i >= REG_AX && i <= REG_R15 {
   1881 			reg[i] = (i - REG_AX) & 7
   1882 			if i >= REG_R8 {
   1883 				regrex[i] = Rxr | Rxx | Rxb
   1884 			}
   1885 		}
   1886 
   1887 		if i >= REG_F0 && i <= REG_F0+7 {
   1888 			reg[i] = (i - REG_F0) & 7
   1889 		}
   1890 		if i >= REG_M0 && i <= REG_M0+7 {
   1891 			reg[i] = (i - REG_M0) & 7
   1892 		}
   1893 		if i >= REG_X0 && i <= REG_X0+15 {
   1894 			reg[i] = (i - REG_X0) & 7
   1895 			if i >= REG_X0+8 {
   1896 				regrex[i] = Rxr | Rxx | Rxb
   1897 			}
   1898 		}
   1899 
   1900 		if i >= REG_CR+8 && i <= REG_CR+15 {
   1901 			regrex[i] = Rxr
   1902 		}
   1903 	}
   1904 }
   1905 
   1906 func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   1907 	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
   1908 		return 0
   1909 	}
   1910 	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
   1911 		switch a.Reg {
   1912 		case REG_CS:
   1913 			return 0x2e
   1914 
   1915 		case REG_DS:
   1916 			return 0x3e
   1917 
   1918 		case REG_ES:
   1919 			return 0x26
   1920 
   1921 		case REG_FS:
   1922 			return 0x64
   1923 
   1924 		case REG_GS:
   1925 			return 0x65
   1926 
   1927 		case REG_TLS:
   1928 			// NOTE: Systems listed here should be only systems that
   1929 			// support direct TLS references like 8(TLS) implemented as
   1930 			// direct references from FS or GS. Systems that require
   1931 			// the initial-exec model, where you load the TLS base into
   1932 			// a register and then index from that register, do not reach
   1933 			// this code and should not be listed.
   1934 			if p.Mode == 32 {
   1935 				switch ctxt.Headtype {
   1936 				default:
   1937 					log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
   1938 
   1939 				case obj.Hdarwin,
   1940 					obj.Hdragonfly,
   1941 					obj.Hfreebsd,
   1942 					obj.Hnetbsd,
   1943 					obj.Hopenbsd:
   1944 					return 0x65 // GS
   1945 				}
   1946 			}
   1947 
   1948 			switch ctxt.Headtype {
   1949 			default:
   1950 				log.Fatalf("unknown TLS base register for %s", obj.Headstr(ctxt.Headtype))
   1951 
   1952 			case obj.Hlinux:
   1953 				if ctxt.Flag_shared != 0 {
   1954 					log.Fatalf("unknown TLS base register for linux with -shared")
   1955 				} else {
   1956 					return 0x64 // FS
   1957 				}
   1958 
   1959 			case obj.Hdragonfly,
   1960 				obj.Hfreebsd,
   1961 				obj.Hnetbsd,
   1962 				obj.Hopenbsd,
   1963 				obj.Hsolaris:
   1964 				return 0x64 // FS
   1965 
   1966 			case obj.Hdarwin:
   1967 				return 0x65 // GS
   1968 			}
   1969 		}
   1970 	}
   1971 
   1972 	if p.Mode == 32 {
   1973 		return 0
   1974 	}
   1975 
   1976 	switch a.Index {
   1977 	case REG_CS:
   1978 		return 0x2e
   1979 
   1980 	case REG_DS:
   1981 		return 0x3e
   1982 
   1983 	case REG_ES:
   1984 		return 0x26
   1985 
   1986 	case REG_TLS:
   1987 		if ctxt.Flag_shared != 0 {
   1988 			// When building for inclusion into a shared library, an instruction of the form
   1989 			//     MOV 0(CX)(TLS*1), AX
   1990 			// becomes
   1991 			//     mov %fs:(%rcx), %rax
   1992 			// which assumes that the correct TLS offset has been loaded into %rcx (today
   1993 			// there is only one TLS variable -- g -- so this is OK). When not building for
   1994 			// a shared library the instruction does not require a prefix.
   1995 			if a.Offset != 0 {
   1996 				log.Fatalf("cannot handle non-0 offsets to TLS")
   1997 			}
   1998 			return 0x64
   1999 		}
   2000 
   2001 	case REG_FS:
   2002 		return 0x64
   2003 
   2004 	case REG_GS:
   2005 		return 0x65
   2006 	}
   2007 
   2008 	return 0
   2009 }
   2010 
   2011 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2012 	switch a.Type {
   2013 	case obj.TYPE_NONE:
   2014 		return Ynone
   2015 
   2016 	case obj.TYPE_BRANCH:
   2017 		return Ybr
   2018 
   2019 	case obj.TYPE_INDIR:
   2020 		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
   2021 			return Yindir
   2022 		}
   2023 		return Yxxx
   2024 
   2025 	case obj.TYPE_MEM:
   2026 		return Ym
   2027 
   2028 	case obj.TYPE_ADDR:
   2029 		switch a.Name {
   2030 		case obj.NAME_EXTERN,
   2031 			obj.NAME_GOTREF,
   2032 			obj.NAME_STATIC:
   2033 			if a.Sym != nil && isextern(a.Sym) || p.Mode == 32 {
   2034 				return Yi32
   2035 			}
   2036 			return Yiauto // use pc-relative addressing
   2037 
   2038 		case obj.NAME_AUTO,
   2039 			obj.NAME_PARAM:
   2040 			return Yiauto
   2041 		}
   2042 
   2043 		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
   2044 		// and got Yi32 in an earlier version of this code.
   2045 		// Keep doing that until we fix yduff etc.
   2046 		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
   2047 			return Yi32
   2048 		}
   2049 
   2050 		if a.Sym != nil || a.Name != obj.NAME_NONE {
   2051 			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
   2052 		}
   2053 		fallthrough
   2054 
   2055 		// fall through
   2056 
   2057 	case obj.TYPE_CONST:
   2058 		if a.Sym != nil {
   2059 			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
   2060 		}
   2061 
   2062 		v := a.Offset
   2063 		if p.Mode == 32 {
   2064 			v = int64(int32(v))
   2065 		}
   2066 		if v == 0 {
   2067 			return Yi0
   2068 		}
   2069 		if v == 1 {
   2070 			return Yi1
   2071 		}
   2072 		if v >= 0 && v <= 127 {
   2073 			return Yu7
   2074 		}
   2075 		if v >= 0 && v <= 255 {
   2076 			return Yu8
   2077 		}
   2078 		if v >= -128 && v <= 127 {
   2079 			return Yi8
   2080 		}
   2081 		if p.Mode == 32 {
   2082 			return Yi32
   2083 		}
   2084 		l := int32(v)
   2085 		if int64(l) == v {
   2086 			return Ys32 /* can sign extend */
   2087 		}
   2088 		if v>>32 == 0 {
   2089 			return Yi32 /* unsigned */
   2090 		}
   2091 		return Yi64
   2092 
   2093 	case obj.TYPE_TEXTSIZE:
   2094 		return Ytextsize
   2095 	}
   2096 
   2097 	if a.Type != obj.TYPE_REG {
   2098 		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
   2099 		return Yxxx
   2100 	}
   2101 
   2102 	switch a.Reg {
   2103 	case REG_AL:
   2104 		return Yal
   2105 
   2106 	case REG_AX:
   2107 		return Yax
   2108 
   2109 		/*
   2110 			case REG_SPB:
   2111 		*/
   2112 	case REG_BPB,
   2113 		REG_SIB,
   2114 		REG_DIB,
   2115 		REG_R8B,
   2116 		REG_R9B,
   2117 		REG_R10B,
   2118 		REG_R11B,
   2119 		REG_R12B,
   2120 		REG_R13B,
   2121 		REG_R14B,
   2122 		REG_R15B:
   2123 		if ctxt.Asmode != 64 {
   2124 			return Yxxx
   2125 		}
   2126 		fallthrough
   2127 
   2128 	case REG_DL,
   2129 		REG_BL,
   2130 		REG_AH,
   2131 		REG_CH,
   2132 		REG_DH,
   2133 		REG_BH:
   2134 		return Yrb
   2135 
   2136 	case REG_CL:
   2137 		return Ycl
   2138 
   2139 	case REG_CX:
   2140 		return Ycx
   2141 
   2142 	case REG_DX, REG_BX:
   2143 		return Yrx
   2144 
   2145 	case REG_R8, /* not really Yrl */
   2146 		REG_R9,
   2147 		REG_R10,
   2148 		REG_R11,
   2149 		REG_R12,
   2150 		REG_R13,
   2151 		REG_R14,
   2152 		REG_R15:
   2153 		if ctxt.Asmode != 64 {
   2154 			return Yxxx
   2155 		}
   2156 		fallthrough
   2157 
   2158 	case REG_SP, REG_BP, REG_SI, REG_DI:
   2159 		if p.Mode == 32 {
   2160 			return Yrl32
   2161 		}
   2162 		return Yrl
   2163 
   2164 	case REG_F0 + 0:
   2165 		return Yf0
   2166 
   2167 	case REG_F0 + 1,
   2168 		REG_F0 + 2,
   2169 		REG_F0 + 3,
   2170 		REG_F0 + 4,
   2171 		REG_F0 + 5,
   2172 		REG_F0 + 6,
   2173 		REG_F0 + 7:
   2174 		return Yrf
   2175 
   2176 	case REG_M0 + 0,
   2177 		REG_M0 + 1,
   2178 		REG_M0 + 2,
   2179 		REG_M0 + 3,
   2180 		REG_M0 + 4,
   2181 		REG_M0 + 5,
   2182 		REG_M0 + 6,
   2183 		REG_M0 + 7:
   2184 		return Ymr
   2185 
   2186 	case REG_X0 + 0,
   2187 		REG_X0 + 1,
   2188 		REG_X0 + 2,
   2189 		REG_X0 + 3,
   2190 		REG_X0 + 4,
   2191 		REG_X0 + 5,
   2192 		REG_X0 + 6,
   2193 		REG_X0 + 7,
   2194 		REG_X0 + 8,
   2195 		REG_X0 + 9,
   2196 		REG_X0 + 10,
   2197 		REG_X0 + 11,
   2198 		REG_X0 + 12,
   2199 		REG_X0 + 13,
   2200 		REG_X0 + 14,
   2201 		REG_X0 + 15:
   2202 		return Yxr
   2203 
   2204 	case REG_CS:
   2205 		return Ycs
   2206 	case REG_SS:
   2207 		return Yss
   2208 	case REG_DS:
   2209 		return Yds
   2210 	case REG_ES:
   2211 		return Yes
   2212 	case REG_FS:
   2213 		return Yfs
   2214 	case REG_GS:
   2215 		return Ygs
   2216 	case REG_TLS:
   2217 		return Ytls
   2218 
   2219 	case REG_GDTR:
   2220 		return Ygdtr
   2221 	case REG_IDTR:
   2222 		return Yidtr
   2223 	case REG_LDTR:
   2224 		return Yldtr
   2225 	case REG_MSW:
   2226 		return Ymsw
   2227 	case REG_TASK:
   2228 		return Ytask
   2229 
   2230 	case REG_CR + 0:
   2231 		return Ycr0
   2232 	case REG_CR + 1:
   2233 		return Ycr1
   2234 	case REG_CR + 2:
   2235 		return Ycr2
   2236 	case REG_CR + 3:
   2237 		return Ycr3
   2238 	case REG_CR + 4:
   2239 		return Ycr4
   2240 	case REG_CR + 5:
   2241 		return Ycr5
   2242 	case REG_CR + 6:
   2243 		return Ycr6
   2244 	case REG_CR + 7:
   2245 		return Ycr7
   2246 	case REG_CR + 8:
   2247 		return Ycr8
   2248 
   2249 	case REG_DR + 0:
   2250 		return Ydr0
   2251 	case REG_DR + 1:
   2252 		return Ydr1
   2253 	case REG_DR + 2:
   2254 		return Ydr2
   2255 	case REG_DR + 3:
   2256 		return Ydr3
   2257 	case REG_DR + 4:
   2258 		return Ydr4
   2259 	case REG_DR + 5:
   2260 		return Ydr5
   2261 	case REG_DR + 6:
   2262 		return Ydr6
   2263 	case REG_DR + 7:
   2264 		return Ydr7
   2265 
   2266 	case REG_TR + 0:
   2267 		return Ytr0
   2268 	case REG_TR + 1:
   2269 		return Ytr1
   2270 	case REG_TR + 2:
   2271 		return Ytr2
   2272 	case REG_TR + 3:
   2273 		return Ytr3
   2274 	case REG_TR + 4:
   2275 		return Ytr4
   2276 	case REG_TR + 5:
   2277 		return Ytr5
   2278 	case REG_TR + 6:
   2279 		return Ytr6
   2280 	case REG_TR + 7:
   2281 		return Ytr7
   2282 	}
   2283 
   2284 	return Yxxx
   2285 }
   2286 
   2287 func asmidx(ctxt *obj.Link, scale int, index int, base int) {
   2288 	var i int
   2289 
   2290 	switch index {
   2291 	default:
   2292 		goto bad
   2293 
   2294 	case REG_NONE:
   2295 		i = 4 << 3
   2296 		goto bas
   2297 
   2298 	case REG_R8,
   2299 		REG_R9,
   2300 		REG_R10,
   2301 		REG_R11,
   2302 		REG_R12,
   2303 		REG_R13,
   2304 		REG_R14,
   2305 		REG_R15:
   2306 		if ctxt.Asmode != 64 {
   2307 			goto bad
   2308 		}
   2309 		fallthrough
   2310 
   2311 	case REG_AX,
   2312 		REG_CX,
   2313 		REG_DX,
   2314 		REG_BX,
   2315 		REG_BP,
   2316 		REG_SI,
   2317 		REG_DI:
   2318 		i = reg[index] << 3
   2319 	}
   2320 
   2321 	switch scale {
   2322 	default:
   2323 		goto bad
   2324 
   2325 	case 1:
   2326 		break
   2327 
   2328 	case 2:
   2329 		i |= 1 << 6
   2330 
   2331 	case 4:
   2332 		i |= 2 << 6
   2333 
   2334 	case 8:
   2335 		i |= 3 << 6
   2336 	}
   2337 
   2338 bas:
   2339 	switch base {
   2340 	default:
   2341 		goto bad
   2342 
   2343 	case REG_NONE: /* must be mod=00 */
   2344 		i |= 5
   2345 
   2346 	case REG_R8,
   2347 		REG_R9,
   2348 		REG_R10,
   2349 		REG_R11,
   2350 		REG_R12,
   2351 		REG_R13,
   2352 		REG_R14,
   2353 		REG_R15:
   2354 		if ctxt.Asmode != 64 {
   2355 			goto bad
   2356 		}
   2357 		fallthrough
   2358 
   2359 	case REG_AX,
   2360 		REG_CX,
   2361 		REG_DX,
   2362 		REG_BX,
   2363 		REG_SP,
   2364 		REG_BP,
   2365 		REG_SI,
   2366 		REG_DI:
   2367 		i |= reg[base]
   2368 	}
   2369 
   2370 	ctxt.Andptr[0] = byte(i)
   2371 	ctxt.Andptr = ctxt.Andptr[1:]
   2372 	return
   2373 
   2374 bad:
   2375 	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
   2376 	ctxt.Andptr[0] = 0
   2377 	ctxt.Andptr = ctxt.Andptr[1:]
   2378 	return
   2379 }
   2380 
   2381 func put4(ctxt *obj.Link, v int32) {
   2382 	ctxt.Andptr[0] = byte(v)
   2383 	ctxt.Andptr[1] = byte(v >> 8)
   2384 	ctxt.Andptr[2] = byte(v >> 16)
   2385 	ctxt.Andptr[3] = byte(v >> 24)
   2386 	ctxt.Andptr = ctxt.Andptr[4:]
   2387 }
   2388 
   2389 func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
   2390 	var rel obj.Reloc
   2391 
   2392 	v := vaddr(ctxt, p, a, &rel)
   2393 	if rel.Siz != 0 {
   2394 		if rel.Siz != 4 {
   2395 			ctxt.Diag("bad reloc")
   2396 		}
   2397 		r := obj.Addrel(ctxt.Cursym)
   2398 		*r = rel
   2399 		r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   2400 	}
   2401 
   2402 	put4(ctxt, int32(v))
   2403 }
   2404 
   2405 func put8(ctxt *obj.Link, v int64) {
   2406 	ctxt.Andptr[0] = byte(v)
   2407 	ctxt.Andptr[1] = byte(v >> 8)
   2408 	ctxt.Andptr[2] = byte(v >> 16)
   2409 	ctxt.Andptr[3] = byte(v >> 24)
   2410 	ctxt.Andptr[4] = byte(v >> 32)
   2411 	ctxt.Andptr[5] = byte(v >> 40)
   2412 	ctxt.Andptr[6] = byte(v >> 48)
   2413 	ctxt.Andptr[7] = byte(v >> 56)
   2414 	ctxt.Andptr = ctxt.Andptr[8:]
   2415 }
   2416 
   2417 /*
   2418 static void
   2419 relput8(Prog *p, Addr *a)
   2420 {
   2421 	vlong v;
   2422 	Reloc rel, *r;
   2423 
   2424 	v = vaddr(ctxt, p, a, &rel);
   2425 	if(rel.siz != 0) {
   2426 		r = addrel(ctxt->cursym);
   2427 		*r = rel;
   2428 		r->siz = 8;
   2429 		r->off = p->pc + ctxt->andptr - ctxt->and;
   2430 	}
   2431 	put8(ctxt, v);
   2432 }
   2433 */
   2434 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
   2435 	if r != nil {
   2436 		*r = obj.Reloc{}
   2437 	}
   2438 
   2439 	switch a.Name {
   2440 	case obj.NAME_STATIC,
   2441 		obj.NAME_GOTREF,
   2442 		obj.NAME_EXTERN:
   2443 		s := a.Sym
   2444 		if r == nil {
   2445 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   2446 			log.Fatalf("reloc")
   2447 		}
   2448 
   2449 		if a.Name == obj.NAME_GOTREF {
   2450 			r.Siz = 4
   2451 			r.Type = obj.R_GOTPCREL
   2452 		} else if isextern(s) || p.Mode != 64 {
   2453 			r.Siz = 4
   2454 			r.Type = obj.R_ADDR
   2455 		} else {
   2456 			r.Siz = 4
   2457 			r.Type = obj.R_PCREL
   2458 		}
   2459 
   2460 		r.Off = -1 // caller must fill in
   2461 		r.Sym = s
   2462 		r.Add = a.Offset
   2463 
   2464 		return 0
   2465 	}
   2466 
   2467 	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
   2468 		if r == nil {
   2469 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   2470 			log.Fatalf("reloc")
   2471 		}
   2472 
   2473 		r.Type = obj.R_TLS_LE
   2474 		r.Siz = 4
   2475 		r.Off = -1 // caller must fill in
   2476 		r.Add = a.Offset
   2477 		return 0
   2478 	}
   2479 
   2480 	return a.Offset
   2481 }
   2482 
   2483 func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
   2484 	var base int
   2485 	var rel obj.Reloc
   2486 
   2487 	rex &= 0x40 | Rxr
   2488 	v := int32(a.Offset)
   2489 	rel.Siz = 0
   2490 
   2491 	switch a.Type {
   2492 	case obj.TYPE_ADDR:
   2493 		if a.Name == obj.NAME_NONE {
   2494 			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
   2495 		}
   2496 		if a.Index == REG_TLS {
   2497 			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
   2498 		}
   2499 		goto bad
   2500 
   2501 	case obj.TYPE_REG:
   2502 		if a.Reg < REG_AL || REG_X0+15 < a.Reg {
   2503 			goto bad
   2504 		}
   2505 		if v != 0 {
   2506 			goto bad
   2507 		}
   2508 		ctxt.Andptr[0] = byte(3<<6 | reg[a.Reg]<<0 | r<<3)
   2509 		ctxt.Andptr = ctxt.Andptr[1:]
   2510 		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
   2511 		return
   2512 	}
   2513 
   2514 	if a.Type != obj.TYPE_MEM {
   2515 		goto bad
   2516 	}
   2517 
   2518 	if a.Index != REG_NONE && a.Index != REG_TLS {
   2519 		base := int(a.Reg)
   2520 		switch a.Name {
   2521 		case obj.NAME_EXTERN,
   2522 			obj.NAME_GOTREF,
   2523 			obj.NAME_STATIC:
   2524 			if !isextern(a.Sym) && p.Mode == 64 {
   2525 				goto bad
   2526 			}
   2527 			base = REG_NONE
   2528 			v = int32(vaddr(ctxt, p, a, &rel))
   2529 
   2530 		case obj.NAME_AUTO,
   2531 			obj.NAME_PARAM:
   2532 			base = REG_SP
   2533 		}
   2534 
   2535 		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
   2536 		if base == REG_NONE {
   2537 			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
   2538 			ctxt.Andptr = ctxt.Andptr[1:]
   2539 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2540 			goto putrelv
   2541 		}
   2542 
   2543 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   2544 			ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
   2545 			ctxt.Andptr = ctxt.Andptr[1:]
   2546 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2547 			return
   2548 		}
   2549 
   2550 		if v >= -128 && v < 128 && rel.Siz == 0 {
   2551 			ctxt.Andptr[0] = byte(1<<6 | 4<<0 | r<<3)
   2552 			ctxt.Andptr = ctxt.Andptr[1:]
   2553 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2554 			ctxt.Andptr[0] = byte(v)
   2555 			ctxt.Andptr = ctxt.Andptr[1:]
   2556 			return
   2557 		}
   2558 
   2559 		ctxt.Andptr[0] = byte(2<<6 | 4<<0 | r<<3)
   2560 		ctxt.Andptr = ctxt.Andptr[1:]
   2561 		asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2562 		goto putrelv
   2563 	}
   2564 
   2565 	base = int(a.Reg)
   2566 	switch a.Name {
   2567 	case obj.NAME_STATIC,
   2568 		obj.NAME_GOTREF,
   2569 		obj.NAME_EXTERN:
   2570 		if a.Sym == nil {
   2571 			ctxt.Diag("bad addr: %v", p)
   2572 		}
   2573 		base = REG_NONE
   2574 		v = int32(vaddr(ctxt, p, a, &rel))
   2575 
   2576 	case obj.NAME_AUTO,
   2577 		obj.NAME_PARAM:
   2578 		base = REG_SP
   2579 	}
   2580 
   2581 	if base == REG_TLS {
   2582 		v = int32(vaddr(ctxt, p, a, &rel))
   2583 	}
   2584 
   2585 	ctxt.Rexflag |= regrex[base]&Rxb | rex
   2586 	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
   2587 		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
   2588 			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
   2589 				ctxt.Diag("%v has offset against gotref", p)
   2590 			}
   2591 			ctxt.Andptr[0] = byte(0<<6 | 5<<0 | r<<3)
   2592 			ctxt.Andptr = ctxt.Andptr[1:]
   2593 			goto putrelv
   2594 		}
   2595 
   2596 		/* temporary */
   2597 		ctxt.Andptr[0] = byte(0<<6 | 4<<0 | r<<3)
   2598 		ctxt.Andptr = ctxt.Andptr[1:] /* sib present */
   2599 		ctxt.Andptr[0] = 0<<6 | 4<<3 | 5<<0
   2600 		ctxt.Andptr = ctxt.Andptr[1:] /* DS:d32 */
   2601 		goto putrelv
   2602 	}
   2603 
   2604 	if base == REG_SP || base == REG_R12 {
   2605 		if v == 0 {
   2606 			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
   2607 			ctxt.Andptr = ctxt.Andptr[1:]
   2608 			asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2609 			return
   2610 		}
   2611 
   2612 		if v >= -128 && v < 128 {
   2613 			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
   2614 			ctxt.Andptr = ctxt.Andptr[1:]
   2615 			asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2616 			ctxt.Andptr[0] = byte(v)
   2617 			ctxt.Andptr = ctxt.Andptr[1:]
   2618 			return
   2619 		}
   2620 
   2621 		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
   2622 		ctxt.Andptr = ctxt.Andptr[1:]
   2623 		asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2624 		goto putrelv
   2625 	}
   2626 
   2627 	if REG_AX <= base && base <= REG_R15 {
   2628 		if a.Index == REG_TLS && ctxt.Flag_shared == 0 {
   2629 			rel = obj.Reloc{}
   2630 			rel.Type = obj.R_TLS_LE
   2631 			rel.Siz = 4
   2632 			rel.Sym = nil
   2633 			rel.Add = int64(v)
   2634 			v = 0
   2635 		}
   2636 
   2637 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   2638 			ctxt.Andptr[0] = byte(0<<6 | reg[base]<<0 | r<<3)
   2639 			ctxt.Andptr = ctxt.Andptr[1:]
   2640 			return
   2641 		}
   2642 
   2643 		if v >= -128 && v < 128 && rel.Siz == 0 {
   2644 			ctxt.Andptr[0] = byte(1<<6 | reg[base]<<0 | r<<3)
   2645 			ctxt.Andptr[1] = byte(v)
   2646 			ctxt.Andptr = ctxt.Andptr[2:]
   2647 			return
   2648 		}
   2649 
   2650 		ctxt.Andptr[0] = byte(2<<6 | reg[base]<<0 | r<<3)
   2651 		ctxt.Andptr = ctxt.Andptr[1:]
   2652 		goto putrelv
   2653 	}
   2654 
   2655 	goto bad
   2656 
   2657 putrelv:
   2658 	if rel.Siz != 0 {
   2659 		if rel.Siz != 4 {
   2660 			ctxt.Diag("bad rel")
   2661 			goto bad
   2662 		}
   2663 
   2664 		r := obj.Addrel(ctxt.Cursym)
   2665 		*r = rel
   2666 		r.Off = int32(ctxt.Curp.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   2667 	}
   2668 
   2669 	put4(ctxt, v)
   2670 	return
   2671 
   2672 bad:
   2673 	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
   2674 	return
   2675 }
   2676 
   2677 func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
   2678 	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
   2679 }
   2680 
   2681 func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
   2682 	asmandsz(ctxt, p, a, o, 0, 0)
   2683 }
   2684 
   2685 func bytereg(a *obj.Addr, t *uint8) {
   2686 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
   2687 		a.Reg += REG_AL - REG_AX
   2688 		*t = 0
   2689 	}
   2690 }
   2691 
   2692 func unbytereg(a *obj.Addr, t *uint8) {
   2693 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
   2694 		a.Reg += REG_AX - REG_AL
   2695 		*t = 0
   2696 	}
   2697 }
   2698 
   2699 const (
   2700 	E = 0xff
   2701 )
   2702 
   2703 var ymovtab = []Movtab{
   2704 	/* push */
   2705 	Movtab{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
   2706 	Movtab{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
   2707 	Movtab{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
   2708 	Movtab{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
   2709 	Movtab{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   2710 	Movtab{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   2711 	Movtab{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   2712 	Movtab{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   2713 	Movtab{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
   2714 	Movtab{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
   2715 	Movtab{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
   2716 	Movtab{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
   2717 	Movtab{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
   2718 	Movtab{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
   2719 
   2720 	/* pop */
   2721 	Movtab{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
   2722 	Movtab{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
   2723 	Movtab{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
   2724 	Movtab{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   2725 	Movtab{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   2726 	Movtab{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   2727 	Movtab{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   2728 	Movtab{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
   2729 	Movtab{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
   2730 	Movtab{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
   2731 	Movtab{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
   2732 	Movtab{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
   2733 
   2734 	/* mov seg */
   2735 	Movtab{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
   2736 	Movtab{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
   2737 	Movtab{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
   2738 	Movtab{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
   2739 	Movtab{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
   2740 	Movtab{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
   2741 	Movtab{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
   2742 	Movtab{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
   2743 	Movtab{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
   2744 	Movtab{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
   2745 	Movtab{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
   2746 	Movtab{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
   2747 
   2748 	/* mov cr */
   2749 	Movtab{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   2750 	Movtab{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   2751 	Movtab{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   2752 	Movtab{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   2753 	Movtab{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   2754 	Movtab{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   2755 	Movtab{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   2756 	Movtab{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   2757 	Movtab{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   2758 	Movtab{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   2759 	Movtab{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   2760 	Movtab{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   2761 	Movtab{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   2762 	Movtab{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   2763 	Movtab{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   2764 	Movtab{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   2765 	Movtab{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   2766 	Movtab{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   2767 	Movtab{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   2768 	Movtab{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   2769 
   2770 	/* mov dr */
   2771 	Movtab{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   2772 	Movtab{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   2773 	Movtab{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   2774 	Movtab{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   2775 	Movtab{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   2776 	Movtab{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   2777 	Movtab{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   2778 	Movtab{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   2779 	Movtab{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   2780 	Movtab{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   2781 	Movtab{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   2782 	Movtab{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   2783 
   2784 	/* mov tr */
   2785 	Movtab{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
   2786 	Movtab{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
   2787 	Movtab{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
   2788 	Movtab{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
   2789 
   2790 	/* lgdt, sgdt, lidt, sidt */
   2791 	Movtab{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   2792 	Movtab{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   2793 	Movtab{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   2794 	Movtab{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   2795 	Movtab{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   2796 	Movtab{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   2797 	Movtab{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   2798 	Movtab{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   2799 
   2800 	/* lldt, sldt */
   2801 	Movtab{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
   2802 	Movtab{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
   2803 
   2804 	/* lmsw, smsw */
   2805 	Movtab{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
   2806 	Movtab{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
   2807 
   2808 	/* ltr, str */
   2809 	Movtab{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
   2810 	Movtab{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
   2811 
   2812 	/* load full pointer - unsupported
   2813 	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
   2814 	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
   2815 	*/
   2816 
   2817 	/* double shift */
   2818 	Movtab{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   2819 	Movtab{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   2820 	Movtab{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   2821 	Movtab{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   2822 	Movtab{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   2823 	Movtab{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   2824 	Movtab{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   2825 	Movtab{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   2826 	Movtab{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   2827 	Movtab{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   2828 	Movtab{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   2829 	Movtab{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   2830 	Movtab{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   2831 	Movtab{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   2832 	Movtab{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   2833 	Movtab{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   2834 	Movtab{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   2835 	Movtab{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   2836 
   2837 	/* load TLS base */
   2838 	Movtab{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   2839 	Movtab{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   2840 	Movtab{0, 0, 0, 0, 0, [4]uint8{}},
   2841 }
   2842 
   2843 func isax(a *obj.Addr) bool {
   2844 	switch a.Reg {
   2845 	case REG_AX, REG_AL, REG_AH:
   2846 		return true
   2847 	}
   2848 
   2849 	if a.Index == REG_AX {
   2850 		return true
   2851 	}
   2852 	return false
   2853 }
   2854 
   2855 func subreg(p *obj.Prog, from int, to int) {
   2856 	if false { /* debug['Q'] */
   2857 		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
   2858 	}
   2859 
   2860 	if int(p.From.Reg) == from {
   2861 		p.From.Reg = int16(to)
   2862 		p.Ft = 0
   2863 	}
   2864 
   2865 	if int(p.To.Reg) == from {
   2866 		p.To.Reg = int16(to)
   2867 		p.Tt = 0
   2868 	}
   2869 
   2870 	if int(p.From.Index) == from {
   2871 		p.From.Index = int16(to)
   2872 		p.Ft = 0
   2873 	}
   2874 
   2875 	if int(p.To.Index) == from {
   2876 		p.To.Index = int16(to)
   2877 		p.Tt = 0
   2878 	}
   2879 
   2880 	if false { /* debug['Q'] */
   2881 		fmt.Printf("%v\n", p)
   2882 	}
   2883 }
   2884 
   2885 func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
   2886 	switch op {
   2887 	case Pm, Pe, Pf2, Pf3:
   2888 		if osize != 1 {
   2889 			if op != Pm {
   2890 				ctxt.Andptr[0] = byte(op)
   2891 				ctxt.Andptr = ctxt.Andptr[1:]
   2892 			}
   2893 			ctxt.Andptr[0] = Pm
   2894 			ctxt.Andptr = ctxt.Andptr[1:]
   2895 			z++
   2896 			op = int(o.op[z])
   2897 			break
   2898 		}
   2899 		fallthrough
   2900 
   2901 	default:
   2902 		if -cap(ctxt.Andptr) == -cap(ctxt.And) || ctxt.And[-cap(ctxt.Andptr)+cap(ctxt.And[:])-1] != Pm {
   2903 			ctxt.Andptr[0] = Pm
   2904 			ctxt.Andptr = ctxt.Andptr[1:]
   2905 		}
   2906 	}
   2907 
   2908 	ctxt.Andptr[0] = byte(op)
   2909 	ctxt.Andptr = ctxt.Andptr[1:]
   2910 	return z
   2911 }
   2912 
   2913 var bpduff1 = []byte{
   2914 	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
   2915 	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
   2916 }
   2917 
   2918 var bpduff2 = []byte{
   2919 	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
   2920 }
   2921 
   2922 func doasm(ctxt *obj.Link, p *obj.Prog) {
   2923 	ctxt.Curp = p // TODO
   2924 
   2925 	o := opindex[p.As&obj.AMask]
   2926 
   2927 	if o == nil {
   2928 		ctxt.Diag("asmins: missing op %v", p)
   2929 		return
   2930 	}
   2931 
   2932 	pre := prefixof(ctxt, p, &p.From)
   2933 	if pre != 0 {
   2934 		ctxt.Andptr[0] = byte(pre)
   2935 		ctxt.Andptr = ctxt.Andptr[1:]
   2936 	}
   2937 	pre = prefixof(ctxt, p, &p.To)
   2938 	if pre != 0 {
   2939 		ctxt.Andptr[0] = byte(pre)
   2940 		ctxt.Andptr = ctxt.Andptr[1:]
   2941 	}
   2942 
   2943 	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
   2944 	// which encodes as SHRQ $32(DX*0), AX.
   2945 	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
   2946 	// Change encoding generated by assemblers and compilers and remove.
   2947 	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
   2948 		p.From3 = new(obj.Addr)
   2949 		p.From3.Type = obj.TYPE_REG
   2950 		p.From3.Reg = p.From.Index
   2951 		p.From.Index = 0
   2952 	}
   2953 
   2954 	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
   2955 	// Change encoding generated by assemblers and compilers (if any) and remove.
   2956 	switch p.As {
   2957 	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
   2958 		if p.From3Type() == obj.TYPE_NONE {
   2959 			p.From3 = new(obj.Addr)
   2960 			*p.From3 = p.From
   2961 			p.From = obj.Addr{}
   2962 			p.From.Type = obj.TYPE_CONST
   2963 			p.From.Offset = p.To.Offset
   2964 			p.To.Offset = 0
   2965 		}
   2966 	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
   2967 		if p.From3Type() == obj.TYPE_NONE {
   2968 			p.From3 = new(obj.Addr)
   2969 			*p.From3 = p.To
   2970 			p.To = obj.Addr{}
   2971 			p.To.Type = obj.TYPE_CONST
   2972 			p.To.Offset = p.From3.Offset
   2973 			p.From3.Offset = 0
   2974 		}
   2975 	}
   2976 
   2977 	if p.Ft == 0 {
   2978 		p.Ft = uint8(oclass(ctxt, p, &p.From))
   2979 	}
   2980 	if p.Tt == 0 {
   2981 		p.Tt = uint8(oclass(ctxt, p, &p.To))
   2982 	}
   2983 
   2984 	ft := int(p.Ft) * Ymax
   2985 	f3t := Ynone * Ymax
   2986 	if p.From3 != nil {
   2987 		f3t = oclass(ctxt, p, p.From3) * Ymax
   2988 	}
   2989 	tt := int(p.Tt) * Ymax
   2990 
   2991 	xo := obj.Bool2int(o.op[0] == 0x0f)
   2992 	z := 0
   2993 	var a *obj.Addr
   2994 	var l int
   2995 	var op int
   2996 	var q *obj.Prog
   2997 	var r *obj.Reloc
   2998 	var rel obj.Reloc
   2999 	var v int64
   3000 	for i := range o.ytab {
   3001 		yt := &o.ytab[i]
   3002 		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
   3003 			switch o.prefix {
   3004 			case Px1: /* first option valid only in 32-bit mode */
   3005 				if ctxt.Mode == 64 && z == 0 {
   3006 					z += int(yt.zoffset) + xo
   3007 					continue
   3008 				}
   3009 			case Pq: /* 16 bit escape and opcode escape */
   3010 				ctxt.Andptr[0] = Pe
   3011 				ctxt.Andptr = ctxt.Andptr[1:]
   3012 
   3013 				ctxt.Andptr[0] = Pm
   3014 				ctxt.Andptr = ctxt.Andptr[1:]
   3015 
   3016 			case Pq3: /* 16 bit escape, Rex.w, and opcode escape */
   3017 				ctxt.Andptr[0] = Pe
   3018 				ctxt.Andptr = ctxt.Andptr[1:]
   3019 
   3020 				ctxt.Andptr[0] = Pw
   3021 				ctxt.Andptr = ctxt.Andptr[1:]
   3022 				ctxt.Andptr[0] = Pm
   3023 				ctxt.Andptr = ctxt.Andptr[1:]
   3024 
   3025 			case Pf2, /* xmm opcode escape */
   3026 				Pf3:
   3027 				ctxt.Andptr[0] = byte(o.prefix)
   3028 				ctxt.Andptr = ctxt.Andptr[1:]
   3029 
   3030 				ctxt.Andptr[0] = Pm
   3031 				ctxt.Andptr = ctxt.Andptr[1:]
   3032 
   3033 			case Pm: /* opcode escape */
   3034 				ctxt.Andptr[0] = Pm
   3035 				ctxt.Andptr = ctxt.Andptr[1:]
   3036 
   3037 			case Pe: /* 16 bit escape */
   3038 				ctxt.Andptr[0] = Pe
   3039 				ctxt.Andptr = ctxt.Andptr[1:]
   3040 
   3041 			case Pw: /* 64-bit escape */
   3042 				if p.Mode != 64 {
   3043 					ctxt.Diag("asmins: illegal 64: %v", p)
   3044 				}
   3045 				ctxt.Rexflag |= Pw
   3046 
   3047 			case Pw8: /* 64-bit escape if z >= 8 */
   3048 				if z >= 8 {
   3049 					if p.Mode != 64 {
   3050 						ctxt.Diag("asmins: illegal 64: %v", p)
   3051 					}
   3052 					ctxt.Rexflag |= Pw
   3053 				}
   3054 
   3055 			case Pb: /* botch */
   3056 				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
   3057 					goto bad
   3058 				}
   3059 				// NOTE(rsc): This is probably safe to do always,
   3060 				// but when enabled it chooses different encodings
   3061 				// than the old cmd/internal/obj/i386 code did,
   3062 				// which breaks our "same bits out" checks.
   3063 				// In particular, CMPB AX, $0 encodes as 80 f8 00
   3064 				// in the original obj/i386, and it would encode
   3065 				// (using a valid, shorter form) as 3c 00 if we enabled
   3066 				// the call to bytereg here.
   3067 				if p.Mode == 64 {
   3068 					bytereg(&p.From, &p.Ft)
   3069 					bytereg(&p.To, &p.Tt)
   3070 				}
   3071 
   3072 			case P32: /* 32 bit but illegal if 64-bit mode */
   3073 				if p.Mode == 64 {
   3074 					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
   3075 				}
   3076 
   3077 			case Py: /* 64-bit only, no prefix */
   3078 				if p.Mode != 64 {
   3079 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3080 				}
   3081 
   3082 			case Py1: /* 64-bit only if z < 1, no prefix */
   3083 				if z < 1 && p.Mode != 64 {
   3084 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3085 				}
   3086 
   3087 			case Py3: /* 64-bit only if z < 3, no prefix */
   3088 				if z < 3 && p.Mode != 64 {
   3089 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3090 				}
   3091 			}
   3092 
   3093 			if z >= len(o.op) {
   3094 				log.Fatalf("asmins bad table %v", p)
   3095 			}
   3096 			op = int(o.op[z])
   3097 			if op == 0x0f {
   3098 				ctxt.Andptr[0] = byte(op)
   3099 				ctxt.Andptr = ctxt.Andptr[1:]
   3100 				z++
   3101 				op = int(o.op[z])
   3102 			}
   3103 
   3104 			switch yt.zcase {
   3105 			default:
   3106 				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
   3107 				return
   3108 
   3109 			case Zpseudo:
   3110 				break
   3111 
   3112 			case Zlit:
   3113 				for ; ; z++ {
   3114 					op = int(o.op[z])
   3115 					if op == 0 {
   3116 						break
   3117 					}
   3118 					ctxt.Andptr[0] = byte(op)
   3119 					ctxt.Andptr = ctxt.Andptr[1:]
   3120 				}
   3121 
   3122 			case Zlitm_r:
   3123 				for ; ; z++ {
   3124 					op = int(o.op[z])
   3125 					if op == 0 {
   3126 						break
   3127 					}
   3128 					ctxt.Andptr[0] = byte(op)
   3129 					ctxt.Andptr = ctxt.Andptr[1:]
   3130 				}
   3131 				asmand(ctxt, p, &p.From, &p.To)
   3132 
   3133 			case Zmb_r:
   3134 				bytereg(&p.From, &p.Ft)
   3135 				fallthrough
   3136 
   3137 				/* fall through */
   3138 			case Zm_r:
   3139 				ctxt.Andptr[0] = byte(op)
   3140 				ctxt.Andptr = ctxt.Andptr[1:]
   3141 
   3142 				asmand(ctxt, p, &p.From, &p.To)
   3143 
   3144 			case Zm2_r:
   3145 				ctxt.Andptr[0] = byte(op)
   3146 				ctxt.Andptr = ctxt.Andptr[1:]
   3147 				ctxt.Andptr[0] = byte(o.op[z+1])
   3148 				ctxt.Andptr = ctxt.Andptr[1:]
   3149 				asmand(ctxt, p, &p.From, &p.To)
   3150 
   3151 			case Zm_r_xm:
   3152 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3153 				asmand(ctxt, p, &p.From, &p.To)
   3154 
   3155 			case Zm_r_xm_nr:
   3156 				ctxt.Rexflag = 0
   3157 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3158 				asmand(ctxt, p, &p.From, &p.To)
   3159 
   3160 			case Zm_r_i_xm:
   3161 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3162 				asmand(ctxt, p, &p.From, p.From3)
   3163 				ctxt.Andptr[0] = byte(p.To.Offset)
   3164 				ctxt.Andptr = ctxt.Andptr[1:]
   3165 
   3166 			case Zm_r_3d:
   3167 				ctxt.Andptr[0] = 0x0f
   3168 				ctxt.Andptr = ctxt.Andptr[1:]
   3169 				ctxt.Andptr[0] = 0x0f
   3170 				ctxt.Andptr = ctxt.Andptr[1:]
   3171 				asmand(ctxt, p, &p.From, &p.To)
   3172 				ctxt.Andptr[0] = byte(op)
   3173 				ctxt.Andptr = ctxt.Andptr[1:]
   3174 
   3175 			case Zibm_r:
   3176 				for {
   3177 					tmp1 := z
   3178 					z++
   3179 					op = int(o.op[tmp1])
   3180 					if op == 0 {
   3181 						break
   3182 					}
   3183 					ctxt.Andptr[0] = byte(op)
   3184 					ctxt.Andptr = ctxt.Andptr[1:]
   3185 				}
   3186 				asmand(ctxt, p, p.From3, &p.To)
   3187 				ctxt.Andptr[0] = byte(p.From.Offset)
   3188 				ctxt.Andptr = ctxt.Andptr[1:]
   3189 
   3190 			case Zaut_r:
   3191 				ctxt.Andptr[0] = 0x8d
   3192 				ctxt.Andptr = ctxt.Andptr[1:] /* leal */
   3193 				if p.From.Type != obj.TYPE_ADDR {
   3194 					ctxt.Diag("asmins: Zaut sb type ADDR")
   3195 				}
   3196 				p.From.Type = obj.TYPE_MEM
   3197 				asmand(ctxt, p, &p.From, &p.To)
   3198 				p.From.Type = obj.TYPE_ADDR
   3199 
   3200 			case Zm_o:
   3201 				ctxt.Andptr[0] = byte(op)
   3202 				ctxt.Andptr = ctxt.Andptr[1:]
   3203 				asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3204 
   3205 			case Zr_m:
   3206 				ctxt.Andptr[0] = byte(op)
   3207 				ctxt.Andptr = ctxt.Andptr[1:]
   3208 				asmand(ctxt, p, &p.To, &p.From)
   3209 
   3210 			case Zr_m_xm:
   3211 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3212 				asmand(ctxt, p, &p.To, &p.From)
   3213 
   3214 			case Zr_m_xm_nr:
   3215 				ctxt.Rexflag = 0
   3216 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3217 				asmand(ctxt, p, &p.To, &p.From)
   3218 
   3219 			case Zo_m:
   3220 				ctxt.Andptr[0] = byte(op)
   3221 				ctxt.Andptr = ctxt.Andptr[1:]
   3222 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3223 
   3224 			case Zcallindreg:
   3225 				r = obj.Addrel(ctxt.Cursym)
   3226 				r.Off = int32(p.Pc)
   3227 				r.Type = obj.R_CALLIND
   3228 				r.Siz = 0
   3229 				fallthrough
   3230 
   3231 			case Zo_m64:
   3232 				ctxt.Andptr[0] = byte(op)
   3233 				ctxt.Andptr = ctxt.Andptr[1:]
   3234 				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
   3235 
   3236 			case Zm_ibo:
   3237 				ctxt.Andptr[0] = byte(op)
   3238 				ctxt.Andptr = ctxt.Andptr[1:]
   3239 				asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3240 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.To, nil))
   3241 				ctxt.Andptr = ctxt.Andptr[1:]
   3242 
   3243 			case Zibo_m:
   3244 				ctxt.Andptr[0] = byte(op)
   3245 				ctxt.Andptr = ctxt.Andptr[1:]
   3246 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3247 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
   3248 				ctxt.Andptr = ctxt.Andptr[1:]
   3249 
   3250 			case Zibo_m_xm:
   3251 				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
   3252 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3253 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
   3254 				ctxt.Andptr = ctxt.Andptr[1:]
   3255 
   3256 			case Z_ib, Zib_:
   3257 				if yt.zcase == Zib_ {
   3258 					a = &p.From
   3259 				} else {
   3260 					a = &p.To
   3261 				}
   3262 				ctxt.Andptr[0] = byte(op)
   3263 				ctxt.Andptr = ctxt.Andptr[1:]
   3264 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, a, nil))
   3265 				ctxt.Andptr = ctxt.Andptr[1:]
   3266 
   3267 			case Zib_rp:
   3268 				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   3269 				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
   3270 				ctxt.Andptr = ctxt.Andptr[1:]
   3271 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
   3272 				ctxt.Andptr = ctxt.Andptr[1:]
   3273 
   3274 			case Zil_rp:
   3275 				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3276 				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
   3277 				ctxt.Andptr = ctxt.Andptr[1:]
   3278 				if o.prefix == Pe {
   3279 					v = vaddr(ctxt, p, &p.From, nil)
   3280 					ctxt.Andptr[0] = byte(v)
   3281 					ctxt.Andptr = ctxt.Andptr[1:]
   3282 					ctxt.Andptr[0] = byte(v >> 8)
   3283 					ctxt.Andptr = ctxt.Andptr[1:]
   3284 				} else {
   3285 					relput4(ctxt, p, &p.From)
   3286 				}
   3287 
   3288 			case Zo_iw:
   3289 				ctxt.Andptr[0] = byte(op)
   3290 				ctxt.Andptr = ctxt.Andptr[1:]
   3291 				if p.From.Type != obj.TYPE_NONE {
   3292 					v = vaddr(ctxt, p, &p.From, nil)
   3293 					ctxt.Andptr[0] = byte(v)
   3294 					ctxt.Andptr = ctxt.Andptr[1:]
   3295 					ctxt.Andptr[0] = byte(v >> 8)
   3296 					ctxt.Andptr = ctxt.Andptr[1:]
   3297 				}
   3298 
   3299 			case Ziq_rp:
   3300 				v = vaddr(ctxt, p, &p.From, &rel)
   3301 				l = int(v >> 32)
   3302 				if l == 0 && rel.Siz != 8 {
   3303 					//p->mark |= 0100;
   3304 					//print("zero: %llux %v\n", v, p);
   3305 					ctxt.Rexflag &^= (0x40 | Rxw)
   3306 
   3307 					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3308 					ctxt.Andptr[0] = byte(0xb8 + reg[p.To.Reg])
   3309 					ctxt.Andptr = ctxt.Andptr[1:]
   3310 					if rel.Type != 0 {
   3311 						r = obj.Addrel(ctxt.Cursym)
   3312 						*r = rel
   3313 						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3314 					}
   3315 
   3316 					put4(ctxt, int32(v))
   3317 				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
   3318 
   3319 					//p->mark |= 0100;
   3320 					//print("sign: %llux %v\n", v, p);
   3321 					ctxt.Andptr[0] = 0xc7
   3322 					ctxt.Andptr = ctxt.Andptr[1:]
   3323 
   3324 					asmando(ctxt, p, &p.To, 0)
   3325 					put4(ctxt, int32(v)) /* need all 8 */
   3326 				} else {
   3327 					//print("all: %llux %v\n", v, p);
   3328 					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3329 
   3330 					ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
   3331 					ctxt.Andptr = ctxt.Andptr[1:]
   3332 					if rel.Type != 0 {
   3333 						r = obj.Addrel(ctxt.Cursym)
   3334 						*r = rel
   3335 						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3336 					}
   3337 
   3338 					put8(ctxt, v)
   3339 				}
   3340 
   3341 			case Zib_rr:
   3342 				ctxt.Andptr[0] = byte(op)
   3343 				ctxt.Andptr = ctxt.Andptr[1:]
   3344 				asmand(ctxt, p, &p.To, &p.To)
   3345 				ctxt.Andptr[0] = byte(vaddr(ctxt, p, &p.From, nil))
   3346 				ctxt.Andptr = ctxt.Andptr[1:]
   3347 
   3348 			case Z_il, Zil_:
   3349 				if yt.zcase == Zil_ {
   3350 					a = &p.From
   3351 				} else {
   3352 					a = &p.To
   3353 				}
   3354 				ctxt.Andptr[0] = byte(op)
   3355 				ctxt.Andptr = ctxt.Andptr[1:]
   3356 				if o.prefix == Pe {
   3357 					v = vaddr(ctxt, p, a, nil)
   3358 					ctxt.Andptr[0] = byte(v)
   3359 					ctxt.Andptr = ctxt.Andptr[1:]
   3360 					ctxt.Andptr[0] = byte(v >> 8)
   3361 					ctxt.Andptr = ctxt.Andptr[1:]
   3362 				} else {
   3363 					relput4(ctxt, p, a)
   3364 				}
   3365 
   3366 			case Zm_ilo, Zilo_m:
   3367 				ctxt.Andptr[0] = byte(op)
   3368 				ctxt.Andptr = ctxt.Andptr[1:]
   3369 				if yt.zcase == Zilo_m {
   3370 					a = &p.From
   3371 					asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3372 				} else {
   3373 					a = &p.To
   3374 					asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3375 				}
   3376 
   3377 				if o.prefix == Pe {
   3378 					v = vaddr(ctxt, p, a, nil)
   3379 					ctxt.Andptr[0] = byte(v)
   3380 					ctxt.Andptr = ctxt.Andptr[1:]
   3381 					ctxt.Andptr[0] = byte(v >> 8)
   3382 					ctxt.Andptr = ctxt.Andptr[1:]
   3383 				} else {
   3384 					relput4(ctxt, p, a)
   3385 				}
   3386 
   3387 			case Zil_rr:
   3388 				ctxt.Andptr[0] = byte(op)
   3389 				ctxt.Andptr = ctxt.Andptr[1:]
   3390 				asmand(ctxt, p, &p.To, &p.To)
   3391 				if o.prefix == Pe {
   3392 					v = vaddr(ctxt, p, &p.From, nil)
   3393 					ctxt.Andptr[0] = byte(v)
   3394 					ctxt.Andptr = ctxt.Andptr[1:]
   3395 					ctxt.Andptr[0] = byte(v >> 8)
   3396 					ctxt.Andptr = ctxt.Andptr[1:]
   3397 				} else {
   3398 					relput4(ctxt, p, &p.From)
   3399 				}
   3400 
   3401 			case Z_rp:
   3402 				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   3403 				ctxt.Andptr[0] = byte(op + reg[p.To.Reg])
   3404 				ctxt.Andptr = ctxt.Andptr[1:]
   3405 
   3406 			case Zrp_:
   3407 				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
   3408 				ctxt.Andptr[0] = byte(op + reg[p.From.Reg])
   3409 				ctxt.Andptr = ctxt.Andptr[1:]
   3410 
   3411 			case Zclr:
   3412 				ctxt.Rexflag &^= Pw
   3413 				ctxt.Andptr[0] = byte(op)
   3414 				ctxt.Andptr = ctxt.Andptr[1:]
   3415 				asmand(ctxt, p, &p.To, &p.To)
   3416 
   3417 			case Zcallcon, Zjmpcon:
   3418 				if yt.zcase == Zcallcon {
   3419 					ctxt.Andptr[0] = byte(op)
   3420 					ctxt.Andptr = ctxt.Andptr[1:]
   3421 				} else {
   3422 					ctxt.Andptr[0] = byte(o.op[z+1])
   3423 					ctxt.Andptr = ctxt.Andptr[1:]
   3424 				}
   3425 				r = obj.Addrel(ctxt.Cursym)
   3426 				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3427 				r.Type = obj.R_PCREL
   3428 				r.Siz = 4
   3429 				r.Add = p.To.Offset
   3430 				put4(ctxt, 0)
   3431 
   3432 			case Zcallind:
   3433 				ctxt.Andptr[0] = byte(op)
   3434 				ctxt.Andptr = ctxt.Andptr[1:]
   3435 				ctxt.Andptr[0] = byte(o.op[z+1])
   3436 				ctxt.Andptr = ctxt.Andptr[1:]
   3437 				r = obj.Addrel(ctxt.Cursym)
   3438 				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3439 				r.Type = obj.R_ADDR
   3440 				r.Siz = 4
   3441 				r.Add = p.To.Offset
   3442 				r.Sym = p.To.Sym
   3443 				put4(ctxt, 0)
   3444 
   3445 			case Zcall, Zcallduff:
   3446 				if p.To.Sym == nil {
   3447 					ctxt.Diag("call without target")
   3448 					log.Fatalf("bad code")
   3449 				}
   3450 
   3451 				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
   3452 					ctxt.Diag("directly calling duff when dynamically linking Go")
   3453 				}
   3454 
   3455 				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
   3456 					// Maintain BP around call, since duffcopy/duffzero can't do it
   3457 					// (the call jumps into the middle of the function).
   3458 					// This makes it possible to see call sites for duffcopy/duffzero in
   3459 					// BP-based profiling tools like Linux perf (which is the
   3460 					// whole point of obj.Framepointer_enabled).
   3461 					// MOVQ BP, -16(SP)
   3462 					// LEAQ -16(SP), BP
   3463 					copy(ctxt.Andptr, bpduff1)
   3464 					ctxt.Andptr = ctxt.Andptr[len(bpduff1):]
   3465 				}
   3466 				ctxt.Andptr[0] = byte(op)
   3467 				ctxt.Andptr = ctxt.Andptr[1:]
   3468 				r = obj.Addrel(ctxt.Cursym)
   3469 				r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3470 				r.Sym = p.To.Sym
   3471 				r.Add = p.To.Offset
   3472 				r.Type = obj.R_CALL
   3473 				r.Siz = 4
   3474 				put4(ctxt, 0)
   3475 
   3476 				if obj.Framepointer_enabled != 0 && yt.zcase == Zcallduff && p.Mode == 64 {
   3477 					// Pop BP pushed above.
   3478 					// MOVQ 0(BP), BP
   3479 					copy(ctxt.Andptr, bpduff2)
   3480 					ctxt.Andptr = ctxt.Andptr[len(bpduff2):]
   3481 				}
   3482 
   3483 			// TODO: jump across functions needs reloc
   3484 			case Zbr, Zjmp, Zloop:
   3485 				if p.To.Sym != nil {
   3486 					if yt.zcase != Zjmp {
   3487 						ctxt.Diag("branch to ATEXT")
   3488 						log.Fatalf("bad code")
   3489 					}
   3490 
   3491 					ctxt.Andptr[0] = byte(o.op[z+1])
   3492 					ctxt.Andptr = ctxt.Andptr[1:]
   3493 					r = obj.Addrel(ctxt.Cursym)
   3494 					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3495 					r.Sym = p.To.Sym
   3496 					r.Type = obj.R_PCREL
   3497 					r.Siz = 4
   3498 					put4(ctxt, 0)
   3499 					break
   3500 				}
   3501 
   3502 				// Assumes q is in this function.
   3503 				// TODO: Check in input, preserve in brchain.
   3504 
   3505 				// Fill in backward jump now.
   3506 				q = p.Pcond
   3507 
   3508 				if q == nil {
   3509 					ctxt.Diag("jmp/branch/loop without target")
   3510 					log.Fatalf("bad code")
   3511 				}
   3512 
   3513 				if p.Back&1 != 0 {
   3514 					v = q.Pc - (p.Pc + 2)
   3515 					if v >= -128 {
   3516 						if p.As == AJCXZL {
   3517 							ctxt.Andptr[0] = 0x67
   3518 							ctxt.Andptr = ctxt.Andptr[1:]
   3519 						}
   3520 						ctxt.Andptr[0] = byte(op)
   3521 						ctxt.Andptr = ctxt.Andptr[1:]
   3522 						ctxt.Andptr[0] = byte(v)
   3523 						ctxt.Andptr = ctxt.Andptr[1:]
   3524 					} else if yt.zcase == Zloop {
   3525 						ctxt.Diag("loop too far: %v", p)
   3526 					} else {
   3527 						v -= 5 - 2
   3528 						if yt.zcase == Zbr {
   3529 							ctxt.Andptr[0] = 0x0f
   3530 							ctxt.Andptr = ctxt.Andptr[1:]
   3531 							v--
   3532 						}
   3533 
   3534 						ctxt.Andptr[0] = byte(o.op[z+1])
   3535 						ctxt.Andptr = ctxt.Andptr[1:]
   3536 						ctxt.Andptr[0] = byte(v)
   3537 						ctxt.Andptr = ctxt.Andptr[1:]
   3538 						ctxt.Andptr[0] = byte(v >> 8)
   3539 						ctxt.Andptr = ctxt.Andptr[1:]
   3540 						ctxt.Andptr[0] = byte(v >> 16)
   3541 						ctxt.Andptr = ctxt.Andptr[1:]
   3542 						ctxt.Andptr[0] = byte(v >> 24)
   3543 						ctxt.Andptr = ctxt.Andptr[1:]
   3544 					}
   3545 
   3546 					break
   3547 				}
   3548 
   3549 				// Annotate target; will fill in later.
   3550 				p.Forwd = q.Rel
   3551 
   3552 				q.Rel = p
   3553 				if p.Back&2 != 0 { // short
   3554 					if p.As == AJCXZL {
   3555 						ctxt.Andptr[0] = 0x67
   3556 						ctxt.Andptr = ctxt.Andptr[1:]
   3557 					}
   3558 					ctxt.Andptr[0] = byte(op)
   3559 					ctxt.Andptr = ctxt.Andptr[1:]
   3560 					ctxt.Andptr[0] = 0
   3561 					ctxt.Andptr = ctxt.Andptr[1:]
   3562 				} else if yt.zcase == Zloop {
   3563 					ctxt.Diag("loop too far: %v", p)
   3564 				} else {
   3565 					if yt.zcase == Zbr {
   3566 						ctxt.Andptr[0] = 0x0f
   3567 						ctxt.Andptr = ctxt.Andptr[1:]
   3568 					}
   3569 					ctxt.Andptr[0] = byte(o.op[z+1])
   3570 					ctxt.Andptr = ctxt.Andptr[1:]
   3571 					ctxt.Andptr[0] = 0
   3572 					ctxt.Andptr = ctxt.Andptr[1:]
   3573 					ctxt.Andptr[0] = 0
   3574 					ctxt.Andptr = ctxt.Andptr[1:]
   3575 					ctxt.Andptr[0] = 0
   3576 					ctxt.Andptr = ctxt.Andptr[1:]
   3577 					ctxt.Andptr[0] = 0
   3578 					ctxt.Andptr = ctxt.Andptr[1:]
   3579 				}
   3580 
   3581 				break
   3582 
   3583 			/*
   3584 				v = q->pc - p->pc - 2;
   3585 				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
   3586 					*ctxt->andptr++ = op;
   3587 					*ctxt->andptr++ = v;
   3588 				} else {
   3589 					v -= 5-2;
   3590 					if(yt.zcase == Zbr) {
   3591 						*ctxt->andptr++ = 0x0f;
   3592 						v--;
   3593 					}
   3594 					*ctxt->andptr++ = o->op[z+1];
   3595 					*ctxt->andptr++ = v;
   3596 					*ctxt->andptr++ = v>>8;
   3597 					*ctxt->andptr++ = v>>16;
   3598 					*ctxt->andptr++ = v>>24;
   3599 				}
   3600 			*/
   3601 
   3602 			case Zbyte:
   3603 				v = vaddr(ctxt, p, &p.From, &rel)
   3604 				if rel.Siz != 0 {
   3605 					rel.Siz = uint8(op)
   3606 					r = obj.Addrel(ctxt.Cursym)
   3607 					*r = rel
   3608 					r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3609 				}
   3610 
   3611 				ctxt.Andptr[0] = byte(v)
   3612 				ctxt.Andptr = ctxt.Andptr[1:]
   3613 				if op > 1 {
   3614 					ctxt.Andptr[0] = byte(v >> 8)
   3615 					ctxt.Andptr = ctxt.Andptr[1:]
   3616 					if op > 2 {
   3617 						ctxt.Andptr[0] = byte(v >> 16)
   3618 						ctxt.Andptr = ctxt.Andptr[1:]
   3619 						ctxt.Andptr[0] = byte(v >> 24)
   3620 						ctxt.Andptr = ctxt.Andptr[1:]
   3621 						if op > 4 {
   3622 							ctxt.Andptr[0] = byte(v >> 32)
   3623 							ctxt.Andptr = ctxt.Andptr[1:]
   3624 							ctxt.Andptr[0] = byte(v >> 40)
   3625 							ctxt.Andptr = ctxt.Andptr[1:]
   3626 							ctxt.Andptr[0] = byte(v >> 48)
   3627 							ctxt.Andptr = ctxt.Andptr[1:]
   3628 							ctxt.Andptr[0] = byte(v >> 56)
   3629 							ctxt.Andptr = ctxt.Andptr[1:]
   3630 						}
   3631 					}
   3632 				}
   3633 			}
   3634 
   3635 			return
   3636 		}
   3637 		z += int(yt.zoffset) + xo
   3638 	}
   3639 	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
   3640 		var pp obj.Prog
   3641 		var t []byte
   3642 		if p.As == mo[0].as {
   3643 			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
   3644 				t = mo[0].op[:]
   3645 				switch mo[0].code {
   3646 				default:
   3647 					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
   3648 
   3649 				case 0: /* lit */
   3650 					for z = 0; t[z] != E; z++ {
   3651 						ctxt.Andptr[0] = t[z]
   3652 						ctxt.Andptr = ctxt.Andptr[1:]
   3653 					}
   3654 
   3655 				case 1: /* r,m */
   3656 					ctxt.Andptr[0] = t[0]
   3657 					ctxt.Andptr = ctxt.Andptr[1:]
   3658 
   3659 					asmando(ctxt, p, &p.To, int(t[1]))
   3660 
   3661 				case 2: /* m,r */
   3662 					ctxt.Andptr[0] = t[0]
   3663 					ctxt.Andptr = ctxt.Andptr[1:]
   3664 
   3665 					asmando(ctxt, p, &p.From, int(t[1]))
   3666 
   3667 				case 3: /* r,m - 2op */
   3668 					ctxt.Andptr[0] = t[0]
   3669 					ctxt.Andptr = ctxt.Andptr[1:]
   3670 
   3671 					ctxt.Andptr[0] = t[1]
   3672 					ctxt.Andptr = ctxt.Andptr[1:]
   3673 					asmando(ctxt, p, &p.To, int(t[2]))
   3674 					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
   3675 
   3676 				case 4: /* m,r - 2op */
   3677 					ctxt.Andptr[0] = t[0]
   3678 					ctxt.Andptr = ctxt.Andptr[1:]
   3679 
   3680 					ctxt.Andptr[0] = t[1]
   3681 					ctxt.Andptr = ctxt.Andptr[1:]
   3682 					asmando(ctxt, p, &p.From, int(t[2]))
   3683 					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
   3684 
   3685 				case 5: /* load full pointer, trash heap */
   3686 					if t[0] != 0 {
   3687 						ctxt.Andptr[0] = t[0]
   3688 						ctxt.Andptr = ctxt.Andptr[1:]
   3689 					}
   3690 					switch p.To.Index {
   3691 					default:
   3692 						goto bad
   3693 
   3694 					case REG_DS:
   3695 						ctxt.Andptr[0] = 0xc5
   3696 						ctxt.Andptr = ctxt.Andptr[1:]
   3697 
   3698 					case REG_SS:
   3699 						ctxt.Andptr[0] = 0x0f
   3700 						ctxt.Andptr = ctxt.Andptr[1:]
   3701 						ctxt.Andptr[0] = 0xb2
   3702 						ctxt.Andptr = ctxt.Andptr[1:]
   3703 
   3704 					case REG_ES:
   3705 						ctxt.Andptr[0] = 0xc4
   3706 						ctxt.Andptr = ctxt.Andptr[1:]
   3707 
   3708 					case REG_FS:
   3709 						ctxt.Andptr[0] = 0x0f
   3710 						ctxt.Andptr = ctxt.Andptr[1:]
   3711 						ctxt.Andptr[0] = 0xb4
   3712 						ctxt.Andptr = ctxt.Andptr[1:]
   3713 
   3714 					case REG_GS:
   3715 						ctxt.Andptr[0] = 0x0f
   3716 						ctxt.Andptr = ctxt.Andptr[1:]
   3717 						ctxt.Andptr[0] = 0xb5
   3718 						ctxt.Andptr = ctxt.Andptr[1:]
   3719 					}
   3720 
   3721 					asmand(ctxt, p, &p.From, &p.To)
   3722 
   3723 				case 6: /* double shift */
   3724 					if t[0] == Pw {
   3725 						if p.Mode != 64 {
   3726 							ctxt.Diag("asmins: illegal 64: %v", p)
   3727 						}
   3728 						ctxt.Rexflag |= Pw
   3729 						t = t[1:]
   3730 					} else if t[0] == Pe {
   3731 						ctxt.Andptr[0] = Pe
   3732 						ctxt.Andptr = ctxt.Andptr[1:]
   3733 						t = t[1:]
   3734 					}
   3735 
   3736 					switch p.From.Type {
   3737 					default:
   3738 						goto bad
   3739 
   3740 					case obj.TYPE_CONST:
   3741 						ctxt.Andptr[0] = 0x0f
   3742 						ctxt.Andptr = ctxt.Andptr[1:]
   3743 						ctxt.Andptr[0] = t[0]
   3744 						ctxt.Andptr = ctxt.Andptr[1:]
   3745 						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
   3746 						ctxt.Andptr[0] = byte(p.From.Offset)
   3747 						ctxt.Andptr = ctxt.Andptr[1:]
   3748 
   3749 					case obj.TYPE_REG:
   3750 						switch p.From.Reg {
   3751 						default:
   3752 							goto bad
   3753 
   3754 						case REG_CL, REG_CX:
   3755 							ctxt.Andptr[0] = 0x0f
   3756 							ctxt.Andptr = ctxt.Andptr[1:]
   3757 							ctxt.Andptr[0] = t[1]
   3758 							ctxt.Andptr = ctxt.Andptr[1:]
   3759 							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
   3760 						}
   3761 					}
   3762 
   3763 				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   3764 				// where you load the TLS base register into a register and then index off that
   3765 				// register to access the actual TLS variables. Systems that allow direct TLS access
   3766 				// are handled in prefixof above and should not be listed here.
   3767 				case 7: /* mov tls, r */
   3768 					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
   3769 						ctxt.Diag("invalid load of TLS: %v", p)
   3770 					}
   3771 
   3772 					if p.Mode == 32 {
   3773 						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   3774 						// where you load the TLS base register into a register and then index off that
   3775 						// register to access the actual TLS variables. Systems that allow direct TLS access
   3776 						// are handled in prefixof above and should not be listed here.
   3777 						switch ctxt.Headtype {
   3778 						default:
   3779 							log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
   3780 
   3781 						case obj.Hlinux,
   3782 							obj.Hnacl:
   3783 							// ELF TLS base is 0(GS).
   3784 							pp.From = p.From
   3785 
   3786 							pp.From.Type = obj.TYPE_MEM
   3787 							pp.From.Reg = REG_GS
   3788 							pp.From.Offset = 0
   3789 							pp.From.Index = REG_NONE
   3790 							pp.From.Scale = 0
   3791 							ctxt.Andptr[0] = 0x65
   3792 							ctxt.Andptr = ctxt.Andptr[1:] // GS
   3793 							ctxt.Andptr[0] = 0x8B
   3794 							ctxt.Andptr = ctxt.Andptr[1:]
   3795 							asmand(ctxt, p, &pp.From, &p.To)
   3796 
   3797 						case obj.Hplan9:
   3798 							if ctxt.Plan9privates == nil {
   3799 								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
   3800 							}
   3801 							pp.From = obj.Addr{}
   3802 							pp.From.Type = obj.TYPE_MEM
   3803 							pp.From.Name = obj.NAME_EXTERN
   3804 							pp.From.Sym = ctxt.Plan9privates
   3805 							pp.From.Offset = 0
   3806 							pp.From.Index = REG_NONE
   3807 							ctxt.Andptr[0] = 0x8B
   3808 							ctxt.Andptr = ctxt.Andptr[1:]
   3809 							asmand(ctxt, p, &pp.From, &p.To)
   3810 
   3811 						case obj.Hwindows:
   3812 							// Windows TLS base is always 0x14(FS).
   3813 							pp.From = p.From
   3814 
   3815 							pp.From.Type = obj.TYPE_MEM
   3816 							pp.From.Reg = REG_FS
   3817 							pp.From.Offset = 0x14
   3818 							pp.From.Index = REG_NONE
   3819 							pp.From.Scale = 0
   3820 							ctxt.Andptr[0] = 0x64
   3821 							ctxt.Andptr = ctxt.Andptr[1:] // FS
   3822 							ctxt.Andptr[0] = 0x8B
   3823 							ctxt.Andptr = ctxt.Andptr[1:]
   3824 							asmand(ctxt, p, &pp.From, &p.To)
   3825 						}
   3826 						break
   3827 					}
   3828 
   3829 					switch ctxt.Headtype {
   3830 					default:
   3831 						log.Fatalf("unknown TLS base location for %s", obj.Headstr(ctxt.Headtype))
   3832 
   3833 					case obj.Hlinux:
   3834 						if ctxt.Flag_shared == 0 {
   3835 							log.Fatalf("unknown TLS base location for linux without -shared")
   3836 						}
   3837 						// Note that this is not generating the same insn as the other cases.
   3838 						//     MOV TLS, R_to
   3839 						// becomes
   3840 						//     movq g@gottpoff(%rip), R_to
   3841 						// which is encoded as
   3842 						//     movq 0(%rip), R_to
   3843 						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
   3844 						// is g, which we can't check here, but will when we assemble the second
   3845 						// instruction.
   3846 						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
   3847 
   3848 						ctxt.Andptr[0] = 0x8B
   3849 						ctxt.Andptr = ctxt.Andptr[1:]
   3850 						ctxt.Andptr[0] = byte(0x05 | (reg[p.To.Reg] << 3))
   3851 						ctxt.Andptr = ctxt.Andptr[1:]
   3852 						r = obj.Addrel(ctxt.Cursym)
   3853 						r.Off = int32(p.Pc + int64(-cap(ctxt.Andptr)+cap(ctxt.And[:])))
   3854 						r.Type = obj.R_TLS_IE
   3855 						r.Siz = 4
   3856 						r.Add = -4
   3857 						put4(ctxt, 0)
   3858 
   3859 					case obj.Hplan9:
   3860 						if ctxt.Plan9privates == nil {
   3861 							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
   3862 						}
   3863 						pp.From = obj.Addr{}
   3864 						pp.From.Type = obj.TYPE_MEM
   3865 						pp.From.Name = obj.NAME_EXTERN
   3866 						pp.From.Sym = ctxt.Plan9privates
   3867 						pp.From.Offset = 0
   3868 						pp.From.Index = REG_NONE
   3869 						ctxt.Rexflag |= Pw
   3870 						ctxt.Andptr[0] = 0x8B
   3871 						ctxt.Andptr = ctxt.Andptr[1:]
   3872 						asmand(ctxt, p, &pp.From, &p.To)
   3873 
   3874 					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
   3875 						// TLS base is 0(FS).
   3876 						pp.From = p.From
   3877 
   3878 						pp.From.Type = obj.TYPE_MEM
   3879 						pp.From.Name = obj.NAME_NONE
   3880 						pp.From.Reg = REG_NONE
   3881 						pp.From.Offset = 0
   3882 						pp.From.Index = REG_NONE
   3883 						pp.From.Scale = 0
   3884 						ctxt.Rexflag |= Pw
   3885 						ctxt.Andptr[0] = 0x64
   3886 						ctxt.Andptr = ctxt.Andptr[1:] // FS
   3887 						ctxt.Andptr[0] = 0x8B
   3888 						ctxt.Andptr = ctxt.Andptr[1:]
   3889 						asmand(ctxt, p, &pp.From, &p.To)
   3890 
   3891 					case obj.Hwindows:
   3892 						// Windows TLS base is always 0x28(GS).
   3893 						pp.From = p.From
   3894 
   3895 						pp.From.Type = obj.TYPE_MEM
   3896 						pp.From.Name = obj.NAME_NONE
   3897 						pp.From.Reg = REG_GS
   3898 						pp.From.Offset = 0x28
   3899 						pp.From.Index = REG_NONE
   3900 						pp.From.Scale = 0
   3901 						ctxt.Rexflag |= Pw
   3902 						ctxt.Andptr[0] = 0x65
   3903 						ctxt.Andptr = ctxt.Andptr[1:] // GS
   3904 						ctxt.Andptr[0] = 0x8B
   3905 						ctxt.Andptr = ctxt.Andptr[1:]
   3906 						asmand(ctxt, p, &pp.From, &p.To)
   3907 					}
   3908 				}
   3909 				return
   3910 			}
   3911 		}
   3912 	}
   3913 	goto bad
   3914 
   3915 bad:
   3916 	if p.Mode != 64 {
   3917 		/*
   3918 		 * here, the assembly has failed.
   3919 		 * if its a byte instruction that has
   3920 		 * unaddressable registers, try to
   3921 		 * exchange registers and reissue the
   3922 		 * instruction with the operands renamed.
   3923 		 */
   3924 		pp := *p
   3925 
   3926 		unbytereg(&pp.From, &pp.Ft)
   3927 		unbytereg(&pp.To, &pp.Tt)
   3928 
   3929 		z := int(p.From.Reg)
   3930 		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   3931 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   3932 			// For now, different to keep bit-for-bit compatibility.
   3933 			if p.Mode == 32 {
   3934 				breg := byteswapreg(ctxt, &p.To)
   3935 				if breg != REG_AX {
   3936 					ctxt.Andptr[0] = 0x87
   3937 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
   3938 					asmando(ctxt, p, &p.From, reg[breg])
   3939 					subreg(&pp, z, breg)
   3940 					doasm(ctxt, &pp)
   3941 					ctxt.Andptr[0] = 0x87
   3942 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
   3943 					asmando(ctxt, p, &p.From, reg[breg])
   3944 				} else {
   3945 					ctxt.Andptr[0] = byte(0x90 + reg[z])
   3946 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
   3947 					subreg(&pp, z, REG_AX)
   3948 					doasm(ctxt, &pp)
   3949 					ctxt.Andptr[0] = byte(0x90 + reg[z])
   3950 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
   3951 				}
   3952 				return
   3953 			}
   3954 
   3955 			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
   3956 				// We certainly don't want to exchange
   3957 				// with AX if the op is MUL or DIV.
   3958 				ctxt.Andptr[0] = 0x87
   3959 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
   3960 				asmando(ctxt, p, &p.From, reg[REG_BX])
   3961 				subreg(&pp, z, REG_BX)
   3962 				doasm(ctxt, &pp)
   3963 				ctxt.Andptr[0] = 0x87
   3964 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lhs,bx */
   3965 				asmando(ctxt, p, &p.From, reg[REG_BX])
   3966 			} else {
   3967 				ctxt.Andptr[0] = byte(0x90 + reg[z])
   3968 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
   3969 				subreg(&pp, z, REG_AX)
   3970 				doasm(ctxt, &pp)
   3971 				ctxt.Andptr[0] = byte(0x90 + reg[z])
   3972 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg lsh,ax */
   3973 			}
   3974 			return
   3975 		}
   3976 
   3977 		z = int(p.To.Reg)
   3978 		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   3979 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   3980 			// For now, different to keep bit-for-bit compatibility.
   3981 			if p.Mode == 32 {
   3982 				breg := byteswapreg(ctxt, &p.From)
   3983 				if breg != REG_AX {
   3984 					ctxt.Andptr[0] = 0x87
   3985 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
   3986 					asmando(ctxt, p, &p.To, reg[breg])
   3987 					subreg(&pp, z, breg)
   3988 					doasm(ctxt, &pp)
   3989 					ctxt.Andptr[0] = 0x87
   3990 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
   3991 					asmando(ctxt, p, &p.To, reg[breg])
   3992 				} else {
   3993 					ctxt.Andptr[0] = byte(0x90 + reg[z])
   3994 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
   3995 					subreg(&pp, z, REG_AX)
   3996 					doasm(ctxt, &pp)
   3997 					ctxt.Andptr[0] = byte(0x90 + reg[z])
   3998 					ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
   3999 				}
   4000 				return
   4001 			}
   4002 
   4003 			if isax(&p.From) {
   4004 				ctxt.Andptr[0] = 0x87
   4005 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
   4006 				asmando(ctxt, p, &p.To, reg[REG_BX])
   4007 				subreg(&pp, z, REG_BX)
   4008 				doasm(ctxt, &pp)
   4009 				ctxt.Andptr[0] = 0x87
   4010 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rhs,bx */
   4011 				asmando(ctxt, p, &p.To, reg[REG_BX])
   4012 			} else {
   4013 				ctxt.Andptr[0] = byte(0x90 + reg[z])
   4014 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
   4015 				subreg(&pp, z, REG_AX)
   4016 				doasm(ctxt, &pp)
   4017 				ctxt.Andptr[0] = byte(0x90 + reg[z])
   4018 				ctxt.Andptr = ctxt.Andptr[1:] /* xchg rsh,ax */
   4019 			}
   4020 			return
   4021 		}
   4022 	}
   4023 
   4024 	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
   4025 	return
   4026 }
   4027 
   4028 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
   4029 // which is not referenced in a.
   4030 // If a is empty, it returns BX to account for MULB-like instructions
   4031 // that might use DX and AX.
   4032 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
   4033 	cand := 1
   4034 	canc := cand
   4035 	canb := canc
   4036 	cana := canb
   4037 
   4038 	if a.Type == obj.TYPE_NONE {
   4039 		cand = 0
   4040 		cana = cand
   4041 	}
   4042 
   4043 	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
   4044 		switch a.Reg {
   4045 		case REG_NONE:
   4046 			cand = 0
   4047 			cana = cand
   4048 
   4049 		case REG_AX, REG_AL, REG_AH:
   4050 			cana = 0
   4051 
   4052 		case REG_BX, REG_BL, REG_BH:
   4053 			canb = 0
   4054 
   4055 		case REG_CX, REG_CL, REG_CH:
   4056 			canc = 0
   4057 
   4058 		case REG_DX, REG_DL, REG_DH:
   4059 			cand = 0
   4060 		}
   4061 	}
   4062 
   4063 	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
   4064 		switch a.Index {
   4065 		case REG_AX:
   4066 			cana = 0
   4067 
   4068 		case REG_BX:
   4069 			canb = 0
   4070 
   4071 		case REG_CX:
   4072 			canc = 0
   4073 
   4074 		case REG_DX:
   4075 			cand = 0
   4076 		}
   4077 	}
   4078 
   4079 	if cana != 0 {
   4080 		return REG_AX
   4081 	}
   4082 	if canb != 0 {
   4083 		return REG_BX
   4084 	}
   4085 	if canc != 0 {
   4086 		return REG_CX
   4087 	}
   4088 	if cand != 0 {
   4089 		return REG_DX
   4090 	}
   4091 
   4092 	ctxt.Diag("impossible byte register")
   4093 	log.Fatalf("bad code")
   4094 	return 0
   4095 }
   4096 
   4097 func isbadbyte(a *obj.Addr) bool {
   4098 	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
   4099 }
   4100 
   4101 var naclret = []uint8{
   4102 	0x5e, // POPL SI
   4103 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4104 	0x83,
   4105 	0xe6,
   4106 	0xe0, // ANDL $~31, SI
   4107 	0x4c,
   4108 	0x01,
   4109 	0xfe, // ADDQ R15, SI
   4110 	0xff,
   4111 	0xe6, // JMP SI
   4112 }
   4113 
   4114 var naclret8 = []uint8{
   4115 	0x5d, // POPL BP
   4116 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4117 	0x83,
   4118 	0xe5,
   4119 	0xe0, // ANDL $~31, BP
   4120 	0xff,
   4121 	0xe5, // JMP BP
   4122 }
   4123 
   4124 var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
   4125 
   4126 var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
   4127 
   4128 var naclmovs = []uint8{
   4129 	0x89,
   4130 	0xf6, // MOVL SI, SI
   4131 	0x49,
   4132 	0x8d,
   4133 	0x34,
   4134 	0x37, // LEAQ (R15)(SI*1), SI
   4135 	0x89,
   4136 	0xff, // MOVL DI, DI
   4137 	0x49,
   4138 	0x8d,
   4139 	0x3c,
   4140 	0x3f, // LEAQ (R15)(DI*1), DI
   4141 }
   4142 
   4143 var naclstos = []uint8{
   4144 	0x89,
   4145 	0xff, // MOVL DI, DI
   4146 	0x49,
   4147 	0x8d,
   4148 	0x3c,
   4149 	0x3f, // LEAQ (R15)(DI*1), DI
   4150 }
   4151 
   4152 func nacltrunc(ctxt *obj.Link, reg int) {
   4153 	if reg >= REG_R8 {
   4154 		ctxt.Andptr[0] = 0x45
   4155 		ctxt.Andptr = ctxt.Andptr[1:]
   4156 	}
   4157 	reg = (reg - REG_AX) & 7
   4158 	ctxt.Andptr[0] = 0x89
   4159 	ctxt.Andptr = ctxt.Andptr[1:]
   4160 	ctxt.Andptr[0] = byte(3<<6 | reg<<3 | reg)
   4161 	ctxt.Andptr = ctxt.Andptr[1:]
   4162 }
   4163 
   4164 func asmins(ctxt *obj.Link, p *obj.Prog) {
   4165 	ctxt.Andptr = ctxt.And[:]
   4166 	ctxt.Asmode = int(p.Mode)
   4167 
   4168 	if p.As == obj.AUSEFIELD {
   4169 		r := obj.Addrel(ctxt.Cursym)
   4170 		r.Off = 0
   4171 		r.Siz = 0
   4172 		r.Sym = p.From.Sym
   4173 		r.Type = obj.R_USEFIELD
   4174 		return
   4175 	}
   4176 
   4177 	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
   4178 		switch p.As {
   4179 		case obj.ARET:
   4180 			copy(ctxt.Andptr, naclret8)
   4181 			ctxt.Andptr = ctxt.Andptr[len(naclret8):]
   4182 			return
   4183 
   4184 		case obj.ACALL,
   4185 			obj.AJMP:
   4186 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4187 				ctxt.Andptr[0] = 0x83
   4188 				ctxt.Andptr = ctxt.Andptr[1:]
   4189 				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
   4190 				ctxt.Andptr = ctxt.Andptr[1:]
   4191 				ctxt.Andptr[0] = 0xe0
   4192 				ctxt.Andptr = ctxt.Andptr[1:]
   4193 			}
   4194 
   4195 		case AINT:
   4196 			ctxt.Andptr[0] = 0xf4
   4197 			ctxt.Andptr = ctxt.Andptr[1:]
   4198 			return
   4199 		}
   4200 	}
   4201 
   4202 	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
   4203 		if p.As == AREP {
   4204 			ctxt.Rep++
   4205 			return
   4206 		}
   4207 
   4208 		if p.As == AREPN {
   4209 			ctxt.Repn++
   4210 			return
   4211 		}
   4212 
   4213 		if p.As == ALOCK {
   4214 			ctxt.Lock++
   4215 			return
   4216 		}
   4217 
   4218 		if p.As != ALEAQ && p.As != ALEAL {
   4219 			if p.From.Index != obj.TYPE_NONE && p.From.Scale > 0 {
   4220 				nacltrunc(ctxt, int(p.From.Index))
   4221 			}
   4222 			if p.To.Index != obj.TYPE_NONE && p.To.Scale > 0 {
   4223 				nacltrunc(ctxt, int(p.To.Index))
   4224 			}
   4225 		}
   4226 
   4227 		switch p.As {
   4228 		case obj.ARET:
   4229 			copy(ctxt.Andptr, naclret)
   4230 			ctxt.Andptr = ctxt.Andptr[len(naclret):]
   4231 			return
   4232 
   4233 		case obj.ACALL,
   4234 			obj.AJMP:
   4235 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4236 				// ANDL $~31, reg
   4237 				ctxt.Andptr[0] = 0x83
   4238 				ctxt.Andptr = ctxt.Andptr[1:]
   4239 
   4240 				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_AX))
   4241 				ctxt.Andptr = ctxt.Andptr[1:]
   4242 				ctxt.Andptr[0] = 0xe0
   4243 				ctxt.Andptr = ctxt.Andptr[1:]
   4244 
   4245 				// ADDQ R15, reg
   4246 				ctxt.Andptr[0] = 0x4c
   4247 				ctxt.Andptr = ctxt.Andptr[1:]
   4248 
   4249 				ctxt.Andptr[0] = 0x01
   4250 				ctxt.Andptr = ctxt.Andptr[1:]
   4251 				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_AX))
   4252 				ctxt.Andptr = ctxt.Andptr[1:]
   4253 			}
   4254 
   4255 			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
   4256 				// ANDL $~31, reg
   4257 				ctxt.Andptr[0] = 0x41
   4258 				ctxt.Andptr = ctxt.Andptr[1:]
   4259 
   4260 				ctxt.Andptr[0] = 0x83
   4261 				ctxt.Andptr = ctxt.Andptr[1:]
   4262 				ctxt.Andptr[0] = byte(0xe0 | (p.To.Reg - REG_R8))
   4263 				ctxt.Andptr = ctxt.Andptr[1:]
   4264 				ctxt.Andptr[0] = 0xe0
   4265 				ctxt.Andptr = ctxt.Andptr[1:]
   4266 
   4267 				// ADDQ R15, reg
   4268 				ctxt.Andptr[0] = 0x4d
   4269 				ctxt.Andptr = ctxt.Andptr[1:]
   4270 
   4271 				ctxt.Andptr[0] = 0x01
   4272 				ctxt.Andptr = ctxt.Andptr[1:]
   4273 				ctxt.Andptr[0] = byte(0xf8 | (p.To.Reg - REG_R8))
   4274 				ctxt.Andptr = ctxt.Andptr[1:]
   4275 			}
   4276 
   4277 		case AINT:
   4278 			ctxt.Andptr[0] = 0xf4
   4279 			ctxt.Andptr = ctxt.Andptr[1:]
   4280 			return
   4281 
   4282 		case ASCASB,
   4283 			ASCASW,
   4284 			ASCASL,
   4285 			ASCASQ,
   4286 			ASTOSB,
   4287 			ASTOSW,
   4288 			ASTOSL,
   4289 			ASTOSQ:
   4290 			copy(ctxt.Andptr, naclstos)
   4291 			ctxt.Andptr = ctxt.Andptr[len(naclstos):]
   4292 
   4293 		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
   4294 			copy(ctxt.Andptr, naclmovs)
   4295 			ctxt.Andptr = ctxt.Andptr[len(naclmovs):]
   4296 		}
   4297 
   4298 		if ctxt.Rep != 0 {
   4299 			ctxt.Andptr[0] = 0xf3
   4300 			ctxt.Andptr = ctxt.Andptr[1:]
   4301 			ctxt.Rep = 0
   4302 		}
   4303 
   4304 		if ctxt.Repn != 0 {
   4305 			ctxt.Andptr[0] = 0xf2
   4306 			ctxt.Andptr = ctxt.Andptr[1:]
   4307 			ctxt.Repn = 0
   4308 		}
   4309 
   4310 		if ctxt.Lock != 0 {
   4311 			ctxt.Andptr[0] = 0xf0
   4312 			ctxt.Andptr = ctxt.Andptr[1:]
   4313 			ctxt.Lock = 0
   4314 		}
   4315 	}
   4316 
   4317 	ctxt.Rexflag = 0
   4318 	and0 := ctxt.Andptr
   4319 	ctxt.Asmode = int(p.Mode)
   4320 	doasm(ctxt, p)
   4321 	if ctxt.Rexflag != 0 {
   4322 		/*
   4323 		 * as befits the whole approach of the architecture,
   4324 		 * the rex prefix must appear before the first opcode byte
   4325 		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
   4326 		 * before the 0f opcode escape!), or it might be ignored.
   4327 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
   4328 		 */
   4329 		if p.Mode != 64 {
   4330 			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
   4331 		}
   4332 		n := -cap(ctxt.Andptr) + cap(and0)
   4333 		var c int
   4334 		var np int
   4335 		for np = 0; np < n; np++ {
   4336 			c = int(and0[np])
   4337 			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
   4338 				break
   4339 			}
   4340 		}
   4341 
   4342 		copy(and0[np+1:], and0[np:n])
   4343 		and0[np] = byte(0x40 | ctxt.Rexflag)
   4344 		ctxt.Andptr = ctxt.Andptr[1:]
   4345 	}
   4346 
   4347 	n := -cap(ctxt.Andptr) + cap(ctxt.And[:])
   4348 	var r *obj.Reloc
   4349 	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
   4350 		r = &ctxt.Cursym.R[i:][0]
   4351 		if int64(r.Off) < p.Pc {
   4352 			break
   4353 		}
   4354 		if ctxt.Rexflag != 0 {
   4355 			r.Off++
   4356 		}
   4357 		if r.Type == obj.R_PCREL {
   4358 			// PC-relative addressing is relative to the end of the instruction,
   4359 			// but the relocations applied by the linker are relative to the end
   4360 			// of the relocation. Because immediate instruction
   4361 			// arguments can follow the PC-relative memory reference in the
   4362 			// instruction encoding, the two may not coincide. In this case,
   4363 			// adjust addend so that linker can keep relocating relative to the
   4364 			// end of the relocation.
   4365 			r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
   4366 		}
   4367 	}
   4368 
   4369 	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
   4370 		switch p.To.Reg {
   4371 		case REG_SP:
   4372 			copy(ctxt.Andptr, naclspfix)
   4373 			ctxt.Andptr = ctxt.Andptr[len(naclspfix):]
   4374 
   4375 		case REG_BP:
   4376 			copy(ctxt.Andptr, naclbpfix)
   4377 			ctxt.Andptr = ctxt.Andptr[len(naclbpfix):]
   4378 		}
   4379 	}
   4380 }
   4381