Home | History | Annotate | Download | only in x86
      1 // Inferno utils/6l/span.c
      2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
      3 //
      4 //	Copyright  1994-1999 Lucent Technologies Inc.  All rights reserved.
      5 //	Portions Copyright  1995-1997 C H Forsyth (forsyth (a] terzarima.net)
      6 //	Portions Copyright  1997-1999 Vita Nuova Limited
      7 //	Portions Copyright  2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
      8 //	Portions Copyright  2004,2006 Bruce Ellis
      9 //	Portions Copyright  2005-2007 C H Forsyth (forsyth (a] terzarima.net)
     10 //	Revisions Copyright  2000-2007 Lucent Technologies Inc. and others
     11 //	Portions Copyright  2009 The Go Authors. All rights reserved.
     12 //
     13 // Permission is hereby granted, free of charge, to any person obtaining a copy
     14 // of this software and associated documentation files (the "Software"), to deal
     15 // in the Software without restriction, including without limitation the rights
     16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     17 // copies of the Software, and to permit persons to whom the Software is
     18 // furnished to do so, subject to the following conditions:
     19 //
     20 // The above copyright notice and this permission notice shall be included in
     21 // all copies or substantial portions of the Software.
     22 //
     23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     29 // THE SOFTWARE.
     30 
     31 package x86
     32 
     33 import (
     34 	"cmd/internal/obj"
     35 	"encoding/binary"
     36 	"fmt"
     37 	"log"
     38 	"strings"
     39 )
     40 
     41 // Instruction layout.
     42 
     43 const (
     44 	// Loop alignment constants:
     45 	// want to align loop entry to LoopAlign-byte boundary,
     46 	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
     47 	// We define a loop entry as the target of a backward jump.
     48 	//
     49 	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
     50 	// and it aligns all jump targets, not just backward jump targets.
     51 	//
     52 	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
     53 	// is very slight but negative, so the alignment is disabled by
     54 	// setting MaxLoopPad = 0. The code is here for reference and
     55 	// for future experiments.
     56 	//
     57 	LoopAlign  = 16
     58 	MaxLoopPad = 0
     59 	funcAlign  = 16
     60 )
     61 
     62 type Optab struct {
     63 	as     obj.As
     64 	ytab   []ytab
     65 	prefix uint8
     66 	op     [23]uint8
     67 }
     68 
     69 type ytab struct {
     70 	from    uint8
     71 	from3   uint8
     72 	to      uint8
     73 	zcase   uint8
     74 	zoffset uint8
     75 }
     76 
     77 type Movtab struct {
     78 	as   obj.As
     79 	ft   uint8
     80 	f3t  uint8
     81 	tt   uint8
     82 	code uint8
     83 	op   [4]uint8
     84 }
     85 
     86 const (
     87 	Yxxx = iota
     88 	Ynone
     89 	Yi0 // $0
     90 	Yi1 // $1
     91 	Yi8 // $x, x fits in int8
     92 	Yu8 // $x, x fits in uint8
     93 	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
     94 	Ys32
     95 	Yi32
     96 	Yi64
     97 	Yiauto
     98 	Yal
     99 	Ycl
    100 	Yax
    101 	Ycx
    102 	Yrb
    103 	Yrl
    104 	Yrl32 // Yrl on 32-bit system
    105 	Yrf
    106 	Yf0
    107 	Yrx
    108 	Ymb
    109 	Yml
    110 	Ym
    111 	Ybr
    112 	Ycs
    113 	Yss
    114 	Yds
    115 	Yes
    116 	Yfs
    117 	Ygs
    118 	Ygdtr
    119 	Yidtr
    120 	Yldtr
    121 	Ymsw
    122 	Ytask
    123 	Ycr0
    124 	Ycr1
    125 	Ycr2
    126 	Ycr3
    127 	Ycr4
    128 	Ycr5
    129 	Ycr6
    130 	Ycr7
    131 	Ycr8
    132 	Ydr0
    133 	Ydr1
    134 	Ydr2
    135 	Ydr3
    136 	Ydr4
    137 	Ydr5
    138 	Ydr6
    139 	Ydr7
    140 	Ytr0
    141 	Ytr1
    142 	Ytr2
    143 	Ytr3
    144 	Ytr4
    145 	Ytr5
    146 	Ytr6
    147 	Ytr7
    148 	Ymr
    149 	Ymm
    150 	Yxr
    151 	Yxm
    152 	Yyr
    153 	Yym
    154 	Ytls
    155 	Ytextsize
    156 	Yindir
    157 	Ymax
    158 )
    159 
    160 const (
    161 	Zxxx = iota
    162 	Zlit
    163 	Zlitm_r
    164 	Z_rp
    165 	Zbr
    166 	Zcall
    167 	Zcallcon
    168 	Zcallduff
    169 	Zcallind
    170 	Zcallindreg
    171 	Zib_
    172 	Zib_rp
    173 	Zibo_m
    174 	Zibo_m_xm
    175 	Zil_
    176 	Zil_rp
    177 	Ziq_rp
    178 	Zilo_m
    179 	Zjmp
    180 	Zjmpcon
    181 	Zloop
    182 	Zo_iw
    183 	Zm_o
    184 	Zm_r
    185 	Zm2_r
    186 	Zm_r_xm
    187 	Zm_r_i_xm
    188 	Zm_r_xm_nr
    189 	Zr_m_xm_nr
    190 	Zibm_r /* mmx1,mmx2/mem64,imm8 */
    191 	Zibr_m
    192 	Zmb_r
    193 	Zaut_r
    194 	Zo_m
    195 	Zo_m64
    196 	Zpseudo
    197 	Zr_m
    198 	Zr_m_xm
    199 	Zrp_
    200 	Z_ib
    201 	Z_il
    202 	Zm_ibo
    203 	Zm_ilo
    204 	Zib_rr
    205 	Zil_rr
    206 	Zclr
    207 	Zbyte
    208 	Zvex_rm_v_r
    209 	Zvex_r_v_rm
    210 	Zvex_v_rm_r
    211 	Zvex_i_rm_r
    212 	Zvex_i_r_v
    213 	Zvex_i_rm_v_r
    214 	Zmax
    215 )
    216 
    217 const (
    218 	Px   = 0
    219 	Px1  = 1    // symbolic; exact value doesn't matter
    220 	P32  = 0x32 /* 32-bit only */
    221 	Pe   = 0x66 /* operand escape */
    222 	Pm   = 0x0f /* 2byte opcode escape */
    223 	Pq   = 0xff /* both escapes: 66 0f */
    224 	Pb   = 0xfe /* byte operands */
    225 	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
    226 	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
    227 	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
    228 	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
    229 	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
    230 	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
    231 	Pw   = 0x48 /* Rex.w */
    232 	Pw8  = 0x90 // symbolic; exact value doesn't matter
    233 	Py   = 0x80 /* defaults to 64-bit mode */
    234 	Py1  = 0x81 // symbolic; exact value doesn't matter
    235 	Py3  = 0x83 // symbolic; exact value doesn't matter
    236 	Pvex = 0x84 // symbolic: exact value doesn't matter
    237 
    238 	Rxw = 1 << 3 /* =1, 64-bit operand size */
    239 	Rxr = 1 << 2 /* extend modrm reg */
    240 	Rxx = 1 << 1 /* extend sib index */
    241 	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
    242 )
    243 
    244 const (
    245 	// Encoding for VEX prefix in tables.
    246 	// The P, L, and W fields are chosen to match
    247 	// their eventual locations in the VEX prefix bytes.
    248 
    249 	// P field - 2 bits
    250 	vex66 = 1 << 0
    251 	vexF3 = 2 << 0
    252 	vexF2 = 3 << 0
    253 	// L field - 1 bit
    254 	vexLZ  = 0 << 2
    255 	vexLIG = 0 << 2
    256 	vex128 = 0 << 2
    257 	vex256 = 1 << 2
    258 	// W field - 1 bit
    259 	vexWIG = 0 << 7
    260 	vexW0  = 0 << 7
    261 	vexW1  = 1 << 7
    262 	// M field - 5 bits, but mostly reserved; we can store up to 4
    263 	vex0F   = 1 << 3
    264 	vex0F38 = 2 << 3
    265 	vex0F3A = 3 << 3
    266 
    267 	// Combinations used in the manual.
    268 	VEX_128_0F_WIG      = vex128 | vex0F | vexWIG
    269 	VEX_128_66_0F_W0    = vex128 | vex66 | vex0F | vexW0
    270 	VEX_128_66_0F_W1    = vex128 | vex66 | vex0F | vexW1
    271 	VEX_128_66_0F_WIG   = vex128 | vex66 | vex0F | vexWIG
    272 	VEX_128_66_0F38_W0  = vex128 | vex66 | vex0F38 | vexW0
    273 	VEX_128_66_0F38_W1  = vex128 | vex66 | vex0F38 | vexW1
    274 	VEX_128_66_0F38_WIG = vex128 | vex66 | vex0F38 | vexWIG
    275 	VEX_128_66_0F3A_W0  = vex128 | vex66 | vex0F3A | vexW0
    276 	VEX_128_66_0F3A_W1  = vex128 | vex66 | vex0F3A | vexW1
    277 	VEX_128_66_0F3A_WIG = vex128 | vex66 | vex0F3A | vexWIG
    278 	VEX_128_F2_0F_WIG   = vex128 | vexF2 | vex0F | vexWIG
    279 	VEX_128_F3_0F_WIG   = vex128 | vexF3 | vex0F | vexWIG
    280 	VEX_256_66_0F_WIG   = vex256 | vex66 | vex0F | vexWIG
    281 	VEX_256_66_0F38_W0  = vex256 | vex66 | vex0F38 | vexW0
    282 	VEX_256_66_0F38_W1  = vex256 | vex66 | vex0F38 | vexW1
    283 	VEX_256_66_0F38_WIG = vex256 | vex66 | vex0F38 | vexWIG
    284 	VEX_256_66_0F3A_W0  = vex256 | vex66 | vex0F3A | vexW0
    285 	VEX_256_66_0F3A_W1  = vex256 | vex66 | vex0F3A | vexW1
    286 	VEX_256_66_0F3A_WIG = vex256 | vex66 | vex0F3A | vexWIG
    287 	VEX_256_F2_0F_WIG   = vex256 | vexF2 | vex0F | vexWIG
    288 	VEX_256_F3_0F_WIG   = vex256 | vexF3 | vex0F | vexWIG
    289 	VEX_LIG_0F_WIG      = vexLIG | vex0F | vexWIG
    290 	VEX_LIG_66_0F_WIG   = vexLIG | vex66 | vex0F | vexWIG
    291 	VEX_LIG_66_0F38_W0  = vexLIG | vex66 | vex0F38 | vexW0
    292 	VEX_LIG_66_0F38_W1  = vexLIG | vex66 | vex0F38 | vexW1
    293 	VEX_LIG_66_0F3A_WIG = vexLIG | vex66 | vex0F3A | vexWIG
    294 	VEX_LIG_F2_0F_W0    = vexLIG | vexF2 | vex0F | vexW0
    295 	VEX_LIG_F2_0F_W1    = vexLIG | vexF2 | vex0F | vexW1
    296 	VEX_LIG_F2_0F_WIG   = vexLIG | vexF2 | vex0F | vexWIG
    297 	VEX_LIG_F3_0F_W0    = vexLIG | vexF3 | vex0F | vexW0
    298 	VEX_LIG_F3_0F_W1    = vexLIG | vexF3 | vex0F | vexW1
    299 	VEX_LIG_F3_0F_WIG   = vexLIG | vexF3 | vex0F | vexWIG
    300 	VEX_LZ_0F_WIG       = vexLZ | vex0F | vexWIG
    301 	VEX_LZ_0F38_W0      = vexLZ | vex0F38 | vexW0
    302 	VEX_LZ_0F38_W1      = vexLZ | vex0F38 | vexW1
    303 	VEX_LZ_66_0F38_W0   = vexLZ | vex66 | vex0F38 | vexW0
    304 	VEX_LZ_66_0F38_W1   = vexLZ | vex66 | vex0F38 | vexW1
    305 	VEX_LZ_F2_0F38_W0   = vexLZ | vexF2 | vex0F38 | vexW0
    306 	VEX_LZ_F2_0F38_W1   = vexLZ | vexF2 | vex0F38 | vexW1
    307 	VEX_LZ_F2_0F3A_W0   = vexLZ | vexF2 | vex0F3A | vexW0
    308 	VEX_LZ_F2_0F3A_W1   = vexLZ | vexF2 | vex0F3A | vexW1
    309 	VEX_LZ_F3_0F38_W0   = vexLZ | vexF3 | vex0F38 | vexW0
    310 	VEX_LZ_F3_0F38_W1   = vexLZ | vexF3 | vex0F38 | vexW1
    311 )
    312 
    313 var ycover [Ymax * Ymax]uint8
    314 
    315 var reg [MAXREG]int
    316 
    317 var regrex [MAXREG + 1]int
    318 
    319 var ynone = []ytab{
    320 	{Ynone, Ynone, Ynone, Zlit, 1},
    321 }
    322 
    323 var ytext = []ytab{
    324 	{Ymb, Ynone, Ytextsize, Zpseudo, 0},
    325 	{Ymb, Yi32, Ytextsize, Zpseudo, 1},
    326 }
    327 
    328 var ynop = []ytab{
    329 	{Ynone, Ynone, Ynone, Zpseudo, 0},
    330 	{Ynone, Ynone, Yiauto, Zpseudo, 0},
    331 	{Ynone, Ynone, Yml, Zpseudo, 0},
    332 	{Ynone, Ynone, Yrf, Zpseudo, 0},
    333 	{Ynone, Ynone, Yxr, Zpseudo, 0},
    334 	{Yiauto, Ynone, Ynone, Zpseudo, 0},
    335 	{Yml, Ynone, Ynone, Zpseudo, 0},
    336 	{Yrf, Ynone, Ynone, Zpseudo, 0},
    337 	{Yxr, Ynone, Ynone, Zpseudo, 1},
    338 }
    339 
    340 var yfuncdata = []ytab{
    341 	{Yi32, Ynone, Ym, Zpseudo, 0},
    342 }
    343 
    344 var ypcdata = []ytab{
    345 	{Yi32, Ynone, Yi32, Zpseudo, 0},
    346 }
    347 
    348 var yxorb = []ytab{
    349 	{Yi32, Ynone, Yal, Zib_, 1},
    350 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    351 	{Yrb, Ynone, Ymb, Zr_m, 1},
    352 	{Ymb, Ynone, Yrb, Zm_r, 1},
    353 }
    354 
    355 var yaddl = []ytab{
    356 	{Yi8, Ynone, Yml, Zibo_m, 2},
    357 	{Yi32, Ynone, Yax, Zil_, 1},
    358 	{Yi32, Ynone, Yml, Zilo_m, 2},
    359 	{Yrl, Ynone, Yml, Zr_m, 1},
    360 	{Yml, Ynone, Yrl, Zm_r, 1},
    361 }
    362 
    363 var yincl = []ytab{
    364 	{Ynone, Ynone, Yrl, Z_rp, 1},
    365 	{Ynone, Ynone, Yml, Zo_m, 2},
    366 }
    367 
    368 var yincq = []ytab{
    369 	{Ynone, Ynone, Yml, Zo_m, 2},
    370 }
    371 
    372 var ycmpb = []ytab{
    373 	{Yal, Ynone, Yi32, Z_ib, 1},
    374 	{Ymb, Ynone, Yi32, Zm_ibo, 2},
    375 	{Ymb, Ynone, Yrb, Zm_r, 1},
    376 	{Yrb, Ynone, Ymb, Zr_m, 1},
    377 }
    378 
    379 var ycmpl = []ytab{
    380 	{Yml, Ynone, Yi8, Zm_ibo, 2},
    381 	{Yax, Ynone, Yi32, Z_il, 1},
    382 	{Yml, Ynone, Yi32, Zm_ilo, 2},
    383 	{Yml, Ynone, Yrl, Zm_r, 1},
    384 	{Yrl, Ynone, Yml, Zr_m, 1},
    385 }
    386 
    387 var yshb = []ytab{
    388 	{Yi1, Ynone, Ymb, Zo_m, 2},
    389 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    390 	{Ycx, Ynone, Ymb, Zo_m, 2},
    391 }
    392 
    393 var yshl = []ytab{
    394 	{Yi1, Ynone, Yml, Zo_m, 2},
    395 	{Yi32, Ynone, Yml, Zibo_m, 2},
    396 	{Ycl, Ynone, Yml, Zo_m, 2},
    397 	{Ycx, Ynone, Yml, Zo_m, 2},
    398 }
    399 
    400 var ytestl = []ytab{
    401 	{Yi32, Ynone, Yax, Zil_, 1},
    402 	{Yi32, Ynone, Yml, Zilo_m, 2},
    403 	{Yrl, Ynone, Yml, Zr_m, 1},
    404 	{Yml, Ynone, Yrl, Zm_r, 1},
    405 }
    406 
    407 var ymovb = []ytab{
    408 	{Yrb, Ynone, Ymb, Zr_m, 1},
    409 	{Ymb, Ynone, Yrb, Zm_r, 1},
    410 	{Yi32, Ynone, Yrb, Zib_rp, 1},
    411 	{Yi32, Ynone, Ymb, Zibo_m, 2},
    412 }
    413 
    414 var ybtl = []ytab{
    415 	{Yi8, Ynone, Yml, Zibo_m, 2},
    416 	{Yrl, Ynone, Yml, Zr_m, 1},
    417 }
    418 
    419 var ymovw = []ytab{
    420 	{Yrl, Ynone, Yml, Zr_m, 1},
    421 	{Yml, Ynone, Yrl, Zm_r, 1},
    422 	{Yi0, Ynone, Yrl, Zclr, 1},
    423 	{Yi32, Ynone, Yrl, Zil_rp, 1},
    424 	{Yi32, Ynone, Yml, Zilo_m, 2},
    425 	{Yiauto, Ynone, Yrl, Zaut_r, 2},
    426 }
    427 
    428 var ymovl = []ytab{
    429 	{Yrl, Ynone, Yml, Zr_m, 1},
    430 	{Yml, Ynone, Yrl, Zm_r, 1},
    431 	{Yi0, Ynone, Yrl, Zclr, 1},
    432 	{Yi32, Ynone, Yrl, Zil_rp, 1},
    433 	{Yi32, Ynone, Yml, Zilo_m, 2},
    434 	{Yml, Ynone, Ymr, Zm_r_xm, 1}, // MMX MOVD
    435 	{Ymr, Ynone, Yml, Zr_m_xm, 1}, // MMX MOVD
    436 	{Yml, Ynone, Yxr, Zm_r_xm, 2}, // XMM MOVD (32 bit)
    437 	{Yxr, Ynone, Yml, Zr_m_xm, 2}, // XMM MOVD (32 bit)
    438 	{Yiauto, Ynone, Yrl, Zaut_r, 2},
    439 }
    440 
    441 var yret = []ytab{
    442 	{Ynone, Ynone, Ynone, Zo_iw, 1},
    443 	{Yi32, Ynone, Ynone, Zo_iw, 1},
    444 }
    445 
    446 var ymovq = []ytab{
    447 	// valid in 32-bit mode
    448 	{Ym, Ynone, Ymr, Zm_r_xm_nr, 1},  // 0x6f MMX MOVQ (shorter encoding)
    449 	{Ymr, Ynone, Ym, Zr_m_xm_nr, 1},  // 0x7f MMX MOVQ
    450 	{Yxr, Ynone, Ymr, Zm_r_xm_nr, 2}, // Pf2, 0xd6 MOVDQ2Q
    451 	{Yxm, Ynone, Yxr, Zm_r_xm_nr, 2}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
    452 	{Yxr, Ynone, Yxm, Zr_m_xm_nr, 2}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
    453 
    454 	// valid only in 64-bit mode, usually with 64-bit prefix
    455 	{Yrl, Ynone, Yml, Zr_m, 1},      // 0x89
    456 	{Yml, Ynone, Yrl, Zm_r, 1},      // 0x8b
    457 	{Yi0, Ynone, Yrl, Zclr, 1},      // 0x31
    458 	{Ys32, Ynone, Yrl, Zilo_m, 2},   // 32 bit signed 0xc7,(0)
    459 	{Yi64, Ynone, Yrl, Ziq_rp, 1},   // 0xb8 -- 32/64 bit immediate
    460 	{Yi32, Ynone, Yml, Zilo_m, 2},   // 0xc7,(0)
    461 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},   // 0x6e MMX MOVD
    462 	{Ymr, Ynone, Ymm, Zr_m_xm, 1},   // 0x7e MMX MOVD
    463 	{Yml, Ynone, Yxr, Zm_r_xm, 2},   // Pe, 0x6e MOVD xmm load
    464 	{Yxr, Ynone, Yml, Zr_m_xm, 2},   // Pe, 0x7e MOVD xmm store
    465 	{Yiauto, Ynone, Yrl, Zaut_r, 1}, // 0 built-in LEAQ
    466 }
    467 
    468 var ym_rl = []ytab{
    469 	{Ym, Ynone, Yrl, Zm_r, 1},
    470 }
    471 
    472 var yrl_m = []ytab{
    473 	{Yrl, Ynone, Ym, Zr_m, 1},
    474 }
    475 
    476 var ymb_rl = []ytab{
    477 	{Ymb, Ynone, Yrl, Zmb_r, 1},
    478 }
    479 
    480 var yml_rl = []ytab{
    481 	{Yml, Ynone, Yrl, Zm_r, 1},
    482 }
    483 
    484 var yrl_ml = []ytab{
    485 	{Yrl, Ynone, Yml, Zr_m, 1},
    486 }
    487 
    488 var yml_mb = []ytab{
    489 	{Yrb, Ynone, Ymb, Zr_m, 1},
    490 	{Ymb, Ynone, Yrb, Zm_r, 1},
    491 }
    492 
    493 var yrb_mb = []ytab{
    494 	{Yrb, Ynone, Ymb, Zr_m, 1},
    495 }
    496 
    497 var yxchg = []ytab{
    498 	{Yax, Ynone, Yrl, Z_rp, 1},
    499 	{Yrl, Ynone, Yax, Zrp_, 1},
    500 	{Yrl, Ynone, Yml, Zr_m, 1},
    501 	{Yml, Ynone, Yrl, Zm_r, 1},
    502 }
    503 
    504 var ydivl = []ytab{
    505 	{Yml, Ynone, Ynone, Zm_o, 2},
    506 }
    507 
    508 var ydivb = []ytab{
    509 	{Ymb, Ynone, Ynone, Zm_o, 2},
    510 }
    511 
    512 var yimul = []ytab{
    513 	{Yml, Ynone, Ynone, Zm_o, 2},
    514 	{Yi8, Ynone, Yrl, Zib_rr, 1},
    515 	{Yi32, Ynone, Yrl, Zil_rr, 1},
    516 	{Yml, Ynone, Yrl, Zm_r, 2},
    517 }
    518 
    519 var yimul3 = []ytab{
    520 	{Yi8, Yml, Yrl, Zibm_r, 2},
    521 }
    522 
    523 var ybyte = []ytab{
    524 	{Yi64, Ynone, Ynone, Zbyte, 1},
    525 }
    526 
    527 var yin = []ytab{
    528 	{Yi32, Ynone, Ynone, Zib_, 1},
    529 	{Ynone, Ynone, Ynone, Zlit, 1},
    530 }
    531 
    532 var yint = []ytab{
    533 	{Yi32, Ynone, Ynone, Zib_, 1},
    534 }
    535 
    536 var ypushl = []ytab{
    537 	{Yrl, Ynone, Ynone, Zrp_, 1},
    538 	{Ym, Ynone, Ynone, Zm_o, 2},
    539 	{Yi8, Ynone, Ynone, Zib_, 1},
    540 	{Yi32, Ynone, Ynone, Zil_, 1},
    541 }
    542 
    543 var ypopl = []ytab{
    544 	{Ynone, Ynone, Yrl, Z_rp, 1},
    545 	{Ynone, Ynone, Ym, Zo_m, 2},
    546 }
    547 
    548 var ybswap = []ytab{
    549 	{Ynone, Ynone, Yrl, Z_rp, 2},
    550 }
    551 
    552 var yscond = []ytab{
    553 	{Ynone, Ynone, Ymb, Zo_m, 2},
    554 }
    555 
    556 var yjcond = []ytab{
    557 	{Ynone, Ynone, Ybr, Zbr, 0},
    558 	{Yi0, Ynone, Ybr, Zbr, 0},
    559 	{Yi1, Ynone, Ybr, Zbr, 1},
    560 }
    561 
    562 var yloop = []ytab{
    563 	{Ynone, Ynone, Ybr, Zloop, 1},
    564 }
    565 
    566 var ycall = []ytab{
    567 	{Ynone, Ynone, Yml, Zcallindreg, 0},
    568 	{Yrx, Ynone, Yrx, Zcallindreg, 2},
    569 	{Ynone, Ynone, Yindir, Zcallind, 2},
    570 	{Ynone, Ynone, Ybr, Zcall, 0},
    571 	{Ynone, Ynone, Yi32, Zcallcon, 1},
    572 }
    573 
    574 var yduff = []ytab{
    575 	{Ynone, Ynone, Yi32, Zcallduff, 1},
    576 }
    577 
    578 var yjmp = []ytab{
    579 	{Ynone, Ynone, Yml, Zo_m64, 2},
    580 	{Ynone, Ynone, Ybr, Zjmp, 0},
    581 	{Ynone, Ynone, Yi32, Zjmpcon, 1},
    582 }
    583 
    584 var yfmvd = []ytab{
    585 	{Ym, Ynone, Yf0, Zm_o, 2},
    586 	{Yf0, Ynone, Ym, Zo_m, 2},
    587 	{Yrf, Ynone, Yf0, Zm_o, 2},
    588 	{Yf0, Ynone, Yrf, Zo_m, 2},
    589 }
    590 
    591 var yfmvdp = []ytab{
    592 	{Yf0, Ynone, Ym, Zo_m, 2},
    593 	{Yf0, Ynone, Yrf, Zo_m, 2},
    594 }
    595 
    596 var yfmvf = []ytab{
    597 	{Ym, Ynone, Yf0, Zm_o, 2},
    598 	{Yf0, Ynone, Ym, Zo_m, 2},
    599 }
    600 
    601 var yfmvx = []ytab{
    602 	{Ym, Ynone, Yf0, Zm_o, 2},
    603 }
    604 
    605 var yfmvp = []ytab{
    606 	{Yf0, Ynone, Ym, Zo_m, 2},
    607 }
    608 
    609 var yfcmv = []ytab{
    610 	{Yrf, Ynone, Yf0, Zm_o, 2},
    611 }
    612 
    613 var yfadd = []ytab{
    614 	{Ym, Ynone, Yf0, Zm_o, 2},
    615 	{Yrf, Ynone, Yf0, Zm_o, 2},
    616 	{Yf0, Ynone, Yrf, Zo_m, 2},
    617 }
    618 
    619 var yfxch = []ytab{
    620 	{Yf0, Ynone, Yrf, Zo_m, 2},
    621 	{Yrf, Ynone, Yf0, Zm_o, 2},
    622 }
    623 
    624 var ycompp = []ytab{
    625 	{Yf0, Ynone, Yrf, Zo_m, 2}, /* botch is really f0,f1 */
    626 }
    627 
    628 var ystsw = []ytab{
    629 	{Ynone, Ynone, Ym, Zo_m, 2},
    630 	{Ynone, Ynone, Yax, Zlit, 1},
    631 }
    632 
    633 var ysvrs = []ytab{
    634 	{Ynone, Ynone, Ym, Zo_m, 2},
    635 	{Ym, Ynone, Ynone, Zm_o, 2},
    636 }
    637 
    638 var ymm = []ytab{
    639 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
    640 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    641 }
    642 
    643 var yxm = []ytab{
    644 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    645 }
    646 
    647 var yxm_q4 = []ytab{
    648 	{Yxm, Ynone, Yxr, Zm_r, 1},
    649 }
    650 
    651 var yxcvm1 = []ytab{
    652 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    653 	{Yxm, Ynone, Ymr, Zm_r_xm, 2},
    654 }
    655 
    656 var yxcvm2 = []ytab{
    657 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    658 	{Ymm, Ynone, Yxr, Zm_r_xm, 2},
    659 }
    660 
    661 var yxr = []ytab{
    662 	{Yxr, Ynone, Yxr, Zm_r_xm, 1},
    663 }
    664 
    665 var yxr_ml = []ytab{
    666 	{Yxr, Ynone, Yml, Zr_m_xm, 1},
    667 }
    668 
    669 var ymr = []ytab{
    670 	{Ymr, Ynone, Ymr, Zm_r, 1},
    671 }
    672 
    673 var ymr_ml = []ytab{
    674 	{Ymr, Ynone, Yml, Zr_m_xm, 1},
    675 }
    676 
    677 var yxcmpi = []ytab{
    678 	{Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
    679 }
    680 
    681 var yxmov = []ytab{
    682 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    683 	{Yxr, Ynone, Yxm, Zr_m_xm, 1},
    684 }
    685 
    686 var yxcvfl = []ytab{
    687 	{Yxm, Ynone, Yrl, Zm_r_xm, 1},
    688 }
    689 
    690 var yxcvlf = []ytab{
    691 	{Yml, Ynone, Yxr, Zm_r_xm, 1},
    692 }
    693 
    694 var yxcvfq = []ytab{
    695 	{Yxm, Ynone, Yrl, Zm_r_xm, 2},
    696 }
    697 
    698 var yxcvqf = []ytab{
    699 	{Yml, Ynone, Yxr, Zm_r_xm, 2},
    700 }
    701 
    702 var yps = []ytab{
    703 	{Ymm, Ynone, Ymr, Zm_r_xm, 1},
    704 	{Yi8, Ynone, Ymr, Zibo_m_xm, 2},
    705 	{Yxm, Ynone, Yxr, Zm_r_xm, 2},
    706 	{Yi8, Ynone, Yxr, Zibo_m_xm, 3},
    707 }
    708 
    709 var yxrrl = []ytab{
    710 	{Yxr, Ynone, Yrl, Zm_r, 1},
    711 }
    712 
    713 var ymrxr = []ytab{
    714 	{Ymr, Ynone, Yxr, Zm_r, 1},
    715 	{Yxm, Ynone, Yxr, Zm_r_xm, 1},
    716 }
    717 
    718 var ymshuf = []ytab{
    719 	{Yi8, Ymm, Ymr, Zibm_r, 2},
    720 }
    721 
    722 var ymshufb = []ytab{
    723 	{Yxm, Ynone, Yxr, Zm2_r, 2},
    724 }
    725 
    726 var yxshuf = []ytab{
    727 	{Yu8, Yxm, Yxr, Zibm_r, 2},
    728 }
    729 
    730 var yextrw = []ytab{
    731 	{Yu8, Yxr, Yrl, Zibm_r, 2},
    732 }
    733 
    734 var yextr = []ytab{
    735 	{Yu8, Yxr, Ymm, Zibr_m, 3},
    736 }
    737 
    738 var yinsrw = []ytab{
    739 	{Yu8, Yml, Yxr, Zibm_r, 2},
    740 }
    741 
    742 var yinsr = []ytab{
    743 	{Yu8, Ymm, Yxr, Zibm_r, 3},
    744 }
    745 
    746 var ypsdq = []ytab{
    747 	{Yi8, Ynone, Yxr, Zibo_m, 2},
    748 }
    749 
    750 var ymskb = []ytab{
    751 	{Yxr, Ynone, Yrl, Zm_r_xm, 2},
    752 	{Ymr, Ynone, Yrl, Zm_r_xm, 1},
    753 }
    754 
    755 var ycrc32l = []ytab{
    756 	{Yml, Ynone, Yrl, Zlitm_r, 0},
    757 }
    758 
    759 var yprefetch = []ytab{
    760 	{Ym, Ynone, Ynone, Zm_o, 2},
    761 }
    762 
    763 var yaes = []ytab{
    764 	{Yxm, Ynone, Yxr, Zlitm_r, 2},
    765 }
    766 
    767 var yxbegin = []ytab{
    768 	{Ynone, Ynone, Ybr, Zjmp, 1},
    769 }
    770 
    771 var yxabort = []ytab{
    772 	{Yu8, Ynone, Ynone, Zib_, 1},
    773 }
    774 
    775 var ylddqu = []ytab{
    776 	{Ym, Ynone, Yxr, Zm_r, 1},
    777 }
    778 
    779 // VEX instructions that come in two forms:
    780 //	VTHING xmm2/m128, xmmV, xmm1
    781 //	VTHING ymm2/m256, ymmV, ymm1
    782 // The opcode array in the corresponding Optab entry
    783 // should contain the (VEX prefixes, opcode byte) pair
    784 // for each of the two forms.
    785 // For example, the entries for VPXOR are:
    786 //
    787 //	VPXOR xmm2/m128, xmmV, xmm1
    788 //	VEX.NDS.128.66.0F.WIG EF /r
    789 //
    790 //	VPXOR ymm2/m256, ymmV, ymm1
    791 //	VEX.NDS.256.66.0F.WIG EF /r
    792 //
    793 // The NDS/NDD/DDS part can be dropped, producing this
    794 // Optab entry:
    795 //
    796 //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}}
    797 //
    798 var yvex_xy3 = []ytab{
    799 	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
    800 	{Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
    801 }
    802 
    803 var yvex_ri3 = []ytab{
    804 	{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
    805 }
    806 
    807 var yvex_xyi3 = []ytab{
    808 	{Yu8, Yxm, Yxr, Zvex_i_rm_r, 2},
    809 	{Yu8, Yym, Yyr, Zvex_i_rm_r, 2},
    810 	{Yi8, Yxm, Yxr, Zvex_i_rm_r, 2},
    811 	{Yi8, Yym, Yyr, Zvex_i_rm_r, 2},
    812 }
    813 
    814 var yvex_yyi4 = []ytab{ //TODO don't hide 4 op, some version have xmm version
    815 	{Yym, Yyr, Yyr, Zvex_i_rm_v_r, 2},
    816 }
    817 
    818 var yvex_xyi4 = []ytab{
    819 	{Yxm, Yyr, Yyr, Zvex_i_rm_v_r, 2},
    820 }
    821 
    822 var yvex_shift = []ytab{
    823 	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
    824 	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
    825 	{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
    826 	{Yxm, Yyr, Yyr, Zvex_rm_v_r, 2},
    827 }
    828 
    829 var yvex_shift_dq = []ytab{
    830 	{Yi8, Yxr, Yxr, Zvex_i_r_v, 3},
    831 	{Yi8, Yyr, Yyr, Zvex_i_r_v, 3},
    832 }
    833 
    834 var yvex_r3 = []ytab{
    835 	{Yml, Yrl, Yrl, Zvex_rm_v_r, 2},
    836 }
    837 
    838 var yvex_vmr3 = []ytab{
    839 	{Yrl, Yml, Yrl, Zvex_v_rm_r, 2},
    840 }
    841 
    842 var yvex_xy2 = []ytab{
    843 	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
    844 	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
    845 }
    846 
    847 var yvex_xyr2 = []ytab{
    848 	{Yxr, Ynone, Yrl, Zvex_rm_v_r, 2},
    849 	{Yyr, Ynone, Yrl, Zvex_rm_v_r, 2},
    850 }
    851 
    852 var yvex_vmovdqa = []ytab{
    853 	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
    854 	{Yxr, Ynone, Yxm, Zvex_r_v_rm, 2},
    855 	{Yym, Ynone, Yyr, Zvex_rm_v_r, 2},
    856 	{Yyr, Ynone, Yym, Zvex_r_v_rm, 2},
    857 }
    858 
    859 var yvex_vmovntdq = []ytab{
    860 	{Yxr, Ynone, Ym, Zvex_r_v_rm, 2},
    861 	{Yyr, Ynone, Ym, Zvex_r_v_rm, 2},
    862 }
    863 
    864 var yvex_vpbroadcast = []ytab{
    865 	{Yxm, Ynone, Yxr, Zvex_rm_v_r, 2},
    866 	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
    867 }
    868 
    869 var yvex_vpbroadcast_sd = []ytab{
    870 	{Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
    871 }
    872 
    873 var ymmxmm0f38 = []ytab{
    874 	{Ymm, Ynone, Ymr, Zlitm_r, 3},
    875 	{Yxm, Ynone, Yxr, Zlitm_r, 5},
    876 }
    877 
    878 /*
    879  * You are doasm, holding in your hand a Prog* with p->as set to, say, ACRC32,
    880  * and p->from and p->to as operands (Addr*).  The linker scans optab to find
    881  * the entry with the given p->as and then looks through the ytable for that
    882  * instruction (the second field in the optab struct) for a line whose first
    883  * two values match the Ytypes of the p->from and p->to operands.  The function
    884  * oclass in span.c computes the specific Ytype of an operand and then the set
    885  * of more general Ytypes that it satisfies is implied by the ycover table, set
    886  * up in instinit.  For example, oclass distinguishes the constants 0 and 1
    887  * from the more general 8-bit constants, but instinit says
    888  *
    889  *        ycover[Yi0*Ymax + Ys32] = 1;
    890  *        ycover[Yi1*Ymax + Ys32] = 1;
    891  *        ycover[Yi8*Ymax + Ys32] = 1;
    892  *
    893  * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
    894  * if that's what an instruction can handle.
    895  *
    896  * In parallel with the scan through the ytable for the appropriate line, there
    897  * is a z pointer that starts out pointing at the strange magic byte list in
    898  * the Optab struct.  With each step past a non-matching ytable line, z
    899  * advances by the 4th entry in the line.  When a matching line is found, that
    900  * z pointer has the extra data to use in laying down the instruction bytes.
    901  * The actual bytes laid down are a function of the 3rd entry in the line (that
    902  * is, the Ztype) and the z bytes.
    903  *
    904  * For example, let's look at AADDL.  The optab line says:
    905  *        { AADDL,        yaddl,  Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },
    906  *
    907  * and yaddl says
    908  *        uchar   yaddl[] =
    909  *        {
    910  *                Yi8,    Yml,    Zibo_m, 2,
    911  *                Yi32,   Yax,    Zil_,   1,
    912  *                Yi32,   Yml,    Zilo_m, 2,
    913  *                Yrl,    Yml,    Zr_m,   1,
    914  *                Yml,    Yrl,    Zm_r,   1,
    915  *                0
    916  *        };
    917  *
    918  * so there are 5 possible types of ADDL instruction that can be laid down, and
    919  * possible states used to lay them down (Ztype and z pointer, assuming z
    920  * points at {0x83,(00),0x05,0x81,(00),0x01,0x03}) are:
    921  *
    922  *        Yi8, Yml -> Zibo_m, z (0x83, 00)
    923  *        Yi32, Yax -> Zil_, z+2 (0x05)
    924  *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
    925  *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
    926  *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
    927  *
    928  * The Pconstant in the optab line controls the prefix bytes to emit.  That's
    929  * relatively straightforward as this program goes.
    930  *
    931  * The switch on t[2] in doasm implements the various Z cases.  Zibo_m, for
    932  * example, is an opcode byte (z[0]) then an asmando (which is some kind of
    933  * encoded addressing mode for the Yml arg), and then a single immediate byte.
    934  * Zilo_m is the same but a long (32-bit) immediate.
    935  */
    936 var optab =
    937 /*	as, ytab, andproto, opcode */
    938 []Optab{
    939 	{obj.AXXX, nil, 0, [23]uint8{}},
    940 	{AAAA, ynone, P32, [23]uint8{0x37}},
    941 	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
    942 	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
    943 	{AAAS, ynone, P32, [23]uint8{0x3f}},
    944 	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x10}},
    945 	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    946 	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    947 	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
    948 	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
    949 	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    950 	{AADDPD, yxm, Pq, [23]uint8{0x58}},
    951 	{AADDPS, yxm, Pm, [23]uint8{0x58}},
    952 	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    953 	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
    954 	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
    955 	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
    956 	{AADJSP, nil, 0, [23]uint8{}},
    957 	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
    958 	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    959 	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
    960 	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
    961 	{AANDPD, yxm, Pq, [23]uint8{0x54}},
    962 	{AANDPS, yxm, Pq, [23]uint8{0x54}},
    963 	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    964 	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
    965 	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
    966 	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
    967 	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
    968 	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
    969 	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
    970 	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
    971 	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
    972 	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
    973 	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
    974 	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
    975 	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
    976 	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
    977 	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
    978 	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
    979 	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
    980 	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
    981 	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
    982 	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
    983 	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
    984 	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
    985 	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
    986 	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
    987 	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
    988 	{ABYTE, ybyte, Px, [23]uint8{1}},
    989 	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
    990 	{ACDQ, ynone, Px, [23]uint8{0x99}},
    991 	{ACLC, ynone, Px, [23]uint8{0xf8}},
    992 	{ACLD, ynone, Px, [23]uint8{0xfc}},
    993 	{ACLI, ynone, Px, [23]uint8{0xfa}},
    994 	{ACLTS, ynone, Pm, [23]uint8{0x06}},
    995 	{ACMC, ynone, Px, [23]uint8{0xf5}},
    996 	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
    997 	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
    998 	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
    999 	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   1000 	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   1001 	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   1002 	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   1003 	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   1004 	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   1005 	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   1006 	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   1007 	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   1008 	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   1009 	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   1010 	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   1011 	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   1012 	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   1013 	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   1014 	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   1015 	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   1016 	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   1017 	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   1018 	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   1019 	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   1020 	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   1021 	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   1022 	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   1023 	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   1024 	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   1025 	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   1026 	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   1027 	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   1028 	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   1029 	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   1030 	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   1031 	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   1032 	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   1033 	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   1034 	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   1035 	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   1036 	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   1037 	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   1038 	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   1039 	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   1040 	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   1041 	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   1042 	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   1043 	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   1044 	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   1045 	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1046 	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   1047 	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   1048 	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1049 	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   1050 	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   1051 	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   1052 	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   1053 	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   1054 	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   1055 	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1056 	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
   1057 	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
   1058 	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   1059 	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   1060 	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   1061 	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   1062 	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   1063 	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   1064 	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   1065 	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   1066 	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   1067 	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   1068 	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   1069 	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   1070 	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   1071 	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   1072 	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   1073 	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   1074 	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   1075 	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   1076 	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   1077 	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   1078 	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   1079 	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   1080 	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   1081 	{ACWD, ynone, Pe, [23]uint8{0x99}},
   1082 	{ACQO, ynone, Pw, [23]uint8{0x99}},
   1083 	{ADAA, ynone, P32, [23]uint8{0x27}},
   1084 	{ADAS, ynone, P32, [23]uint8{0x2f}},
   1085 	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
   1086 	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   1087 	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   1088 	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
   1089 	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   1090 	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   1091 	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   1092 	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   1093 	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   1094 	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   1095 	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   1096 	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   1097 	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   1098 	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   1099 	{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   1100 	{AFXSAVE, ysvrs, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   1101 	{AFXRSTOR64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   1102 	{AFXSAVE64, ysvrs, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   1103 	{AHLT, ynone, Px, [23]uint8{0xf4}},
   1104 	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   1105 	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   1106 	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   1107 	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   1108 	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   1109 	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1110 	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1111 	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1112 	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   1113 	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   1114 	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
   1115 	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   1116 	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   1117 	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
   1118 	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   1119 	{AINSB, ynone, Pb, [23]uint8{0x6c}},
   1120 	{AINSL, ynone, Px, [23]uint8{0x6d}},
   1121 	{AINSW, ynone, Pe, [23]uint8{0x6d}},
   1122 	{AINT, yint, Px, [23]uint8{0xcd}},
   1123 	{AINTO, ynone, P32, [23]uint8{0xce}},
   1124 	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   1125 	{AIRETL, ynone, Px, [23]uint8{0xcf}},
   1126 	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   1127 	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   1128 	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   1129 	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   1130 	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
   1131 	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
   1132 	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
   1133 	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
   1134 	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
   1135 	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
   1136 	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
   1137 	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
   1138 	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
   1139 	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
   1140 	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
   1141 	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
   1142 	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
   1143 	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
   1144 	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
   1145 	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
   1146 	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
   1147 	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
   1148 	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
   1149 	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
   1150 	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
   1151 	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
   1152 	{ALAHF, ynone, Px, [23]uint8{0x9f}},
   1153 	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
   1154 	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
   1155 	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
   1156 	{ALDMXCSR, ysvrs, Pm, [23]uint8{0xae, 02, 0xae, 02}},
   1157 	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
   1158 	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
   1159 	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
   1160 	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
   1161 	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
   1162 	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
   1163 	{ALOCK, ynone, Px, [23]uint8{0xf0}},
   1164 	{ALODSB, ynone, Pb, [23]uint8{0xac}},
   1165 	{ALODSL, ynone, Px, [23]uint8{0xad}},
   1166 	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
   1167 	{ALODSW, ynone, Pe, [23]uint8{0xad}},
   1168 	{ALONG, ybyte, Px, [23]uint8{4}},
   1169 	{ALOOP, yloop, Px, [23]uint8{0xe2}},
   1170 	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
   1171 	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
   1172 	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
   1173 	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
   1174 	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
   1175 	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
   1176 	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
   1177 	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
   1178 	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
   1179 	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
   1180 	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
   1181 	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
   1182 	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
   1183 	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
   1184 	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
   1185 	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
   1186 	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
   1187 	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
   1188 	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1189 	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
   1190 	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1191 	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
   1192 	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
   1193 	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
   1194 	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
   1195 	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
   1196 	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
   1197 	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
   1198 	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1199 	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
   1200 	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
   1201 	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
   1202 	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
   1203 	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
   1204 	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
   1205 	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
   1206 	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
   1207 	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
   1208 	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
   1209 	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
   1210 	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0x31, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1211 	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
   1212 	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
   1213 	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
   1214 	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
   1215 	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
   1216 	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
   1217 	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
   1218 	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
   1219 	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
   1220 	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0x31, 0xb8, 0xc7, 00, 0}},
   1221 	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
   1222 	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
   1223 	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
   1224 	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
   1225 	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
   1226 	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
   1227 	{AMULPD, yxm, Pe, [23]uint8{0x59}},
   1228 	{AMULPS, yxm, Ym, [23]uint8{0x59}},
   1229 	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
   1230 	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
   1231 	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
   1232 	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
   1233 	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
   1234 	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
   1235 	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
   1236 	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
   1237 	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
   1238 	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
   1239 	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
   1240 	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
   1241 	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
   1242 	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
   1243 	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1244 	{AORPD, yxm, Pq, [23]uint8{0x56}},
   1245 	{AORPS, yxm, Pm, [23]uint8{0x56}},
   1246 	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1247 	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1248 	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
   1249 	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
   1250 	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
   1251 	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
   1252 	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
   1253 	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
   1254 	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
   1255 	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
   1256 	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
   1257 	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
   1258 	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
   1259 	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
   1260 	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
   1261 	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
   1262 	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
   1263 	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
   1264 	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
   1265 	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
   1266 	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
   1267 	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
   1268 	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
   1269 	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
   1270 	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
   1271 	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
   1272 	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
   1273 	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
   1274 	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
   1275 	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
   1276 	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
   1277 	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
   1278 	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
   1279 	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
   1280 	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
   1281 	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
   1282 	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
   1283 	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
   1284 	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
   1285 	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
   1286 	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
   1287 	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
   1288 	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
   1289 	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
   1290 	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
   1291 	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
   1292 	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
   1293 	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
   1294 	{APMINSW, yxm, Pe, [23]uint8{0xea}},
   1295 	{APMINUB, yxm, Pe, [23]uint8{0xda}},
   1296 	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
   1297 	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
   1298 	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
   1299 	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
   1300 	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
   1301 	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
   1302 	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
   1303 	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
   1304 	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
   1305 	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
   1306 	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
   1307 	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
   1308 	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
   1309 	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
   1310 	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
   1311 	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
   1312 	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
   1313 	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
   1314 	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
   1315 	{APOPAL, ynone, P32, [23]uint8{0x61}},
   1316 	{APOPAW, ynone, Pe, [23]uint8{0x61}},
   1317 	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
   1318 	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
   1319 	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
   1320 	{APOPFL, ynone, P32, [23]uint8{0x9d}},
   1321 	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
   1322 	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
   1323 	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
   1324 	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
   1325 	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
   1326 	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
   1327 	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
   1328 	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
   1329 	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
   1330 	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
   1331 	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
   1332 	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
   1333 	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
   1334 	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
   1335 	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
   1336 	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
   1337 	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
   1338 	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
   1339 	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
   1340 	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
   1341 	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
   1342 	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
   1343 	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
   1344 	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
   1345 	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
   1346 	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
   1347 	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
   1348 	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
   1349 	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
   1350 	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
   1351 	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
   1352 	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
   1353 	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
   1354 	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
   1355 	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
   1356 	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
   1357 	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
   1358 	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
   1359 	{APUSHAL, ynone, P32, [23]uint8{0x60}},
   1360 	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
   1361 	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
   1362 	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
   1363 	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
   1364 	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1365 	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1366 	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1367 	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
   1368 	{AQUAD, ybyte, Px, [23]uint8{8}},
   1369 	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
   1370 	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1371 	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1372 	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1373 	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
   1374 	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
   1375 	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
   1376 	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1377 	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1378 	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1379 	{AREP, ynone, Px, [23]uint8{0xf3}},
   1380 	{AREPN, ynone, Px, [23]uint8{0xf2}},
   1381 	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
   1382 	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
   1383 	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
   1384 	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
   1385 	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
   1386 	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1387 	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1388 	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1389 	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
   1390 	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1391 	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1392 	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1393 	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
   1394 	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
   1395 	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
   1396 	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1397 	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1398 	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1399 	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1400 	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
   1401 	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1402 	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1403 	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1404 	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
   1405 	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1406 	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1407 	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1408 	{ASCASB, ynone, Pb, [23]uint8{0xae}},
   1409 	{ASCASL, ynone, Px, [23]uint8{0xaf}},
   1410 	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
   1411 	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
   1412 	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
   1413 	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
   1414 	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
   1415 	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
   1416 	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
   1417 	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
   1418 	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
   1419 	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
   1420 	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
   1421 	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
   1422 	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
   1423 	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
   1424 	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
   1425 	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
   1426 	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
   1427 	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
   1428 	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1429 	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1430 	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1431 	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1432 	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
   1433 	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1434 	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1435 	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1436 	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
   1437 	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
   1438 	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
   1439 	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
   1440 	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
   1441 	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
   1442 	{ASTC, ynone, Px, [23]uint8{0xf9}},
   1443 	{ASTD, ynone, Px, [23]uint8{0xfd}},
   1444 	{ASTI, ynone, Px, [23]uint8{0xfb}},
   1445 	{ASTMXCSR, ysvrs, Pm, [23]uint8{0xae, 03, 0xae, 03}},
   1446 	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
   1447 	{ASTOSL, ynone, Px, [23]uint8{0xab}},
   1448 	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
   1449 	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
   1450 	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
   1451 	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1452 	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
   1453 	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
   1454 	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1455 	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
   1456 	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
   1457 	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1458 	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
   1459 	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
   1460 	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
   1461 	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1462 	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1463 	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1464 	{obj.ATEXT, ytext, Px, [23]uint8{}},
   1465 	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
   1466 	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
   1467 	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
   1468 	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
   1469 	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
   1470 	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
   1471 	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
   1472 	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
   1473 	{AWAIT, ynone, Px, [23]uint8{0x9b}},
   1474 	{AWORD, ybyte, Px, [23]uint8{2}},
   1475 	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
   1476 	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1477 	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1478 	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1479 	{AXLAT, ynone, Px, [23]uint8{0xd7}},
   1480 	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
   1481 	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1482 	{AXORPD, yxm, Pe, [23]uint8{0x57}},
   1483 	{AXORPS, yxm, Pm, [23]uint8{0x57}},
   1484 	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1485 	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1486 	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
   1487 	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
   1488 	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
   1489 	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
   1490 	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
   1491 	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
   1492 	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
   1493 	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
   1494 	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
   1495 	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
   1496 	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
   1497 	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
   1498 	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
   1499 	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
   1500 	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
   1501 	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
   1502 	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
   1503 	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
   1504 	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
   1505 	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
   1506 	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
   1507 	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
   1508 	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
   1509 	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
   1510 	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
   1511 	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
   1512 	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
   1513 	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
   1514 	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
   1515 	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
   1516 	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
   1517 	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
   1518 	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
   1519 	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
   1520 	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
   1521 	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
   1522 	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
   1523 	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
   1524 	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
   1525 	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
   1526 	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
   1527 	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
   1528 	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
   1529 	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
   1530 	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
   1531 	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
   1532 	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
   1533 	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
   1534 	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
   1535 	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
   1536 	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
   1537 	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
   1538 	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
   1539 	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
   1540 	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
   1541 	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
   1542 	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
   1543 	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
   1544 	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
   1545 	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
   1546 	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
   1547 	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
   1548 	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
   1549 	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
   1550 	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
   1551 	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
   1552 	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
   1553 	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
   1554 	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
   1555 	{AFFREE, nil, 0, [23]uint8{}},
   1556 	{AFLDCW, ysvrs, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
   1557 	{AFLDENV, ysvrs, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
   1558 	{AFRSTOR, ysvrs, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
   1559 	{AFSAVE, ysvrs, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
   1560 	{AFSTCW, ysvrs, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
   1561 	{AFSTENV, ysvrs, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
   1562 	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
   1563 	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
   1564 	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
   1565 	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
   1566 	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
   1567 	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
   1568 	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
   1569 	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
   1570 	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
   1571 	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
   1572 	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
   1573 	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
   1574 	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
   1575 	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
   1576 	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
   1577 	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
   1578 	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
   1579 	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
   1580 	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
   1581 	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
   1582 	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
   1583 	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
   1584 	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
   1585 	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
   1586 	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
   1587 	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
   1588 	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
   1589 	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
   1590 	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
   1591 	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
   1592 	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
   1593 	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
   1594 	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
   1595 	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
   1596 	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
   1597 	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
   1598 	{AINVD, ynone, Pm, [23]uint8{0x08}},
   1599 	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
   1600 	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
   1601 	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
   1602 	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
   1603 	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
   1604 	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
   1605 	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
   1606 	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
   1607 	{ARSM, ynone, Pm, [23]uint8{0xaa}},
   1608 	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
   1609 	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
   1610 	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
   1611 	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
   1612 	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
   1613 	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
   1614 	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
   1615 	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
   1616 	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
   1617 	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1618 	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
   1619 	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
   1620 	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
   1621 	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
   1622 	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
   1623 	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
   1624 	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
   1625 	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
   1626 	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
   1627 	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
   1628 	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
   1629 	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
   1630 	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
   1631 	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
   1632 	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
   1633 	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
   1634 	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
   1635 	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
   1636 	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
   1637 	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
   1638 	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
   1639 	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
   1640 
   1641 	{AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
   1642 	{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
   1643 	{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
   1644 	{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
   1645 	{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
   1646 	{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
   1647 	{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
   1648 	{AMULXQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF6}},
   1649 	{APDEPL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF5}},
   1650 	{APDEPQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF5}},
   1651 	{APEXTL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF5}},
   1652 	{APEXTQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF5}},
   1653 	{ASARXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W0, 0xF7}},
   1654 	{ASARXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F3_0F38_W1, 0xF7}},
   1655 	{ASHLXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W0, 0xF7}},
   1656 	{ASHLXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_66_0F38_W1, 0xF7}},
   1657 	{ASHRXL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF7}},
   1658 	{ASHRXQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W1, 0xF7}},
   1659 
   1660 	{AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
   1661 	{AVMOVDQU, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x6F, VEX_128_F3_0F_WIG, 0x7F, VEX_256_F3_0F_WIG, 0x6F, VEX_256_F3_0F_WIG, 0x7F}},
   1662 	{AVMOVDQA, yvex_vmovdqa, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x6F, VEX_128_66_0F_WIG, 0x7F, VEX_256_66_0F_WIG, 0x6F, VEX_256_66_0F_WIG, 0x7F}},
   1663 	{AVMOVNTDQ, yvex_vmovntdq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xE7, VEX_256_66_0F_WIG, 0xE7}},
   1664 	{AVPCMPEQB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x74, VEX_256_66_0F_WIG, 0x74}},
   1665 	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xEF, VEX_256_66_0F_WIG, 0xEF}},
   1666 	{AVPMOVMSKB, yvex_xyr2, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xD7, VEX_256_66_0F_WIG, 0xD7}},
   1667 	{AVPAND, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xDB, VEX_256_66_0F_WIG, 0xDB}},
   1668 	{AVPBROADCASTB, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x78, VEX_256_66_0F38_W0, 0x78}},
   1669 	{AVPTEST, yvex_xy2, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x17, VEX_256_66_0F38_WIG, 0x17}},
   1670 	{AVPSHUFB, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F38_WIG, 0x00, VEX_256_66_0F38_WIG, 0x00}},
   1671 	{AVPSHUFD, yvex_xyi3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70, VEX_128_66_0F_WIG, 0x70, VEX_256_66_0F_WIG, 0x70}},
   1672 	{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
   1673 	{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
   1674 	{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
   1675 	{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
   1676 	{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
   1677 	{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
   1678 	{AVPSRLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd0, VEX_256_66_0F_WIG, 0x73, 0xd0, VEX_128_66_0F_WIG, 0xd3, VEX_256_66_0F_WIG, 0xd3}},
   1679 	{AVPSRLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xd8, VEX_256_66_0F_WIG, 0x73, 0xd8}},
   1680 	{AVPSLLDQ, yvex_shift_dq, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf8, VEX_256_66_0F_WIG, 0x73, 0xf8}},
   1681 	{AVPERM2F128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_W0, 0x06}},
   1682 	{AVPALIGNR, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x0f}},
   1683 	{AVPBLENDD, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x02}},
   1684 	{AVINSERTI128, yvex_xyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x38}},
   1685 	{AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
   1686 	{ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
   1687 	{ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
   1688 	{AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
   1689 	{AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
   1690 	{AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
   1691 	{AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
   1692 	{AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
   1693 
   1694 	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
   1695 	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
   1696 	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
   1697 	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
   1698 	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
   1699 	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
   1700 	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
   1701 	{obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
   1702 	{obj.ATYPE, nil, 0, [23]uint8{}},
   1703 	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
   1704 	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
   1705 	{obj.AVARDEF, nil, 0, [23]uint8{}},
   1706 	{obj.AVARKILL, nil, 0, [23]uint8{}},
   1707 	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
   1708 	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
   1709 	{obj.AEND, nil, 0, [23]uint8{}},
   1710 	{0, nil, 0, [23]uint8{}},
   1711 }
   1712 
   1713 var opindex [(ALAST + 1) & obj.AMask]*Optab
   1714 
   1715 // isextern reports whether s describes an external symbol that must avoid pc-relative addressing.
   1716 // This happens on systems like Solaris that call .so functions instead of system calls.
   1717 // It does not seem to be necessary for any other systems. This is probably working
   1718 // around a Solaris-specific bug that should be fixed differently, but we don't know
   1719 // what that bug is. And this does fix it.
   1720 func isextern(s *obj.LSym) bool {
   1721 	// All the Solaris dynamic imports from libc.so begin with "libc_".
   1722 	return strings.HasPrefix(s.Name, "libc_")
   1723 }
   1724 
   1725 // single-instruction no-ops of various lengths.
   1726 // constructed by hand and disassembled with gdb to verify.
   1727 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
   1728 var nop = [][16]uint8{
   1729 	{0x90},
   1730 	{0x66, 0x90},
   1731 	{0x0F, 0x1F, 0x00},
   1732 	{0x0F, 0x1F, 0x40, 0x00},
   1733 	{0x0F, 0x1F, 0x44, 0x00, 0x00},
   1734 	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
   1735 	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
   1736 	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1737 	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1738 }
   1739 
   1740 // Native Client rejects the repeated 0x66 prefix.
   1741 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1742 func fillnop(p []byte, n int) {
   1743 	var m int
   1744 
   1745 	for n > 0 {
   1746 		m = n
   1747 		if m > len(nop) {
   1748 			m = len(nop)
   1749 		}
   1750 		copy(p[:m], nop[m-1][:m])
   1751 		p = p[m:]
   1752 		n -= m
   1753 	}
   1754 }
   1755 
   1756 func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
   1757 	s.Grow(int64(c) + int64(pad))
   1758 	fillnop(s.P[c:], int(pad))
   1759 	return c + pad
   1760 }
   1761 
   1762 func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
   1763 	if p.Mode != 64 || ctxt.Arch.PtrSize == 4 {
   1764 		return l
   1765 	}
   1766 	return q
   1767 }
   1768 
   1769 func span6(ctxt *obj.Link, s *obj.LSym) {
   1770 	ctxt.Cursym = s
   1771 
   1772 	if s.P != nil {
   1773 		return
   1774 	}
   1775 
   1776 	if ycover[0] == 0 {
   1777 		instinit()
   1778 	}
   1779 
   1780 	for p := ctxt.Cursym.Text; p != nil; p = p.Link {
   1781 		if p.To.Type == obj.TYPE_BRANCH {
   1782 			if p.Pcond == nil {
   1783 				p.Pcond = p
   1784 			}
   1785 		}
   1786 		if p.As == AADJSP {
   1787 			p.To.Type = obj.TYPE_REG
   1788 			p.To.Reg = REG_SP
   1789 			v := int32(-p.From.Offset)
   1790 			p.From.Offset = int64(v)
   1791 			p.As = spadjop(ctxt, p, AADDL, AADDQ)
   1792 			if v < 0 {
   1793 				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
   1794 				v = -v
   1795 				p.From.Offset = int64(v)
   1796 			}
   1797 
   1798 			if v == 0 {
   1799 				p.As = obj.ANOP
   1800 			}
   1801 		}
   1802 	}
   1803 
   1804 	var q *obj.Prog
   1805 	var count int64 // rough count of number of instructions
   1806 	for p := s.Text; p != nil; p = p.Link {
   1807 		count++
   1808 		p.Back = 2 // use short branches first time through
   1809 		q = p.Pcond
   1810 		if q != nil && (q.Back&2 != 0) {
   1811 			p.Back |= 1 // backward jump
   1812 			q.Back |= 4 // loop head
   1813 		}
   1814 
   1815 		if p.As == AADJSP {
   1816 			p.To.Type = obj.TYPE_REG
   1817 			p.To.Reg = REG_SP
   1818 			v := int32(-p.From.Offset)
   1819 			p.From.Offset = int64(v)
   1820 			p.As = spadjop(ctxt, p, AADDL, AADDQ)
   1821 			if v < 0 {
   1822 				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
   1823 				v = -v
   1824 				p.From.Offset = int64(v)
   1825 			}
   1826 
   1827 			if v == 0 {
   1828 				p.As = obj.ANOP
   1829 			}
   1830 		}
   1831 	}
   1832 	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
   1833 
   1834 	n := 0
   1835 	var c int32
   1836 	errors := ctxt.Errors
   1837 	var deferreturn *obj.LSym
   1838 	if ctxt.Headtype == obj.Hnacl {
   1839 		deferreturn = obj.Linklookup(ctxt, "runtime.deferreturn", 0)
   1840 	}
   1841 	for {
   1842 		loop := int32(0)
   1843 		for i := range s.R {
   1844 			s.R[i] = obj.Reloc{}
   1845 		}
   1846 		s.R = s.R[:0]
   1847 		s.P = s.P[:0]
   1848 		c = 0
   1849 		for p := s.Text; p != nil; p = p.Link {
   1850 			if ctxt.Headtype == obj.Hnacl && p.Isize > 0 {
   1851 
   1852 				// pad everything to avoid crossing 32-byte boundary
   1853 				if c>>5 != (c+int32(p.Isize)-1)>>5 {
   1854 					c = naclpad(ctxt, s, c, -c&31)
   1855 				}
   1856 
   1857 				// pad call deferreturn to start at 32-byte boundary
   1858 				// so that subtracting 5 in jmpdefer will jump back
   1859 				// to that boundary and rerun the call.
   1860 				if p.As == obj.ACALL && p.To.Sym == deferreturn {
   1861 					c = naclpad(ctxt, s, c, -c&31)
   1862 				}
   1863 
   1864 				// pad call to end at 32-byte boundary
   1865 				if p.As == obj.ACALL {
   1866 					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
   1867 				}
   1868 
   1869 				// the linker treats REP and STOSQ as different instructions
   1870 				// but in fact the REP is a prefix on the STOSQ.
   1871 				// make sure REP has room for 2 more bytes, so that
   1872 				// padding will not be inserted before the next instruction.
   1873 				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
   1874 					c = naclpad(ctxt, s, c, -c&31)
   1875 				}
   1876 
   1877 				// same for LOCK.
   1878 				// various instructions follow; the longest is 4 bytes.
   1879 				// give ourselves 8 bytes so as to avoid surprises.
   1880 				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
   1881 					c = naclpad(ctxt, s, c, -c&31)
   1882 				}
   1883 			}
   1884 
   1885 			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
   1886 				// pad with NOPs
   1887 				v := -c & (LoopAlign - 1)
   1888 
   1889 				if v <= MaxLoopPad {
   1890 					s.Grow(int64(c) + int64(v))
   1891 					fillnop(s.P[c:], int(v))
   1892 					c += v
   1893 				}
   1894 			}
   1895 
   1896 			p.Pc = int64(c)
   1897 
   1898 			// process forward jumps to p
   1899 			for q = p.Rel; q != nil; q = q.Forwd {
   1900 				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
   1901 				if q.Back&2 != 0 { // short
   1902 					if v > 127 {
   1903 						loop++
   1904 						q.Back ^= 2
   1905 					}
   1906 
   1907 					if q.As == AJCXZL || q.As == AXBEGIN {
   1908 						s.P[q.Pc+2] = byte(v)
   1909 					} else {
   1910 						s.P[q.Pc+1] = byte(v)
   1911 					}
   1912 				} else {
   1913 					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
   1914 				}
   1915 			}
   1916 
   1917 			p.Rel = nil
   1918 
   1919 			p.Pc = int64(c)
   1920 			asmins(ctxt, p)
   1921 			m := ctxt.AsmBuf.Len()
   1922 			if int(p.Isize) != m {
   1923 				p.Isize = uint8(m)
   1924 				loop++
   1925 			}
   1926 
   1927 			s.Grow(p.Pc + int64(m))
   1928 			copy(s.P[p.Pc:], ctxt.AsmBuf.Bytes())
   1929 			c += int32(m)
   1930 		}
   1931 
   1932 		n++
   1933 		if n > 20 {
   1934 			ctxt.Diag("span must be looping")
   1935 			log.Fatalf("loop")
   1936 		}
   1937 		if loop == 0 {
   1938 			break
   1939 		}
   1940 		if ctxt.Errors > errors {
   1941 			return
   1942 		}
   1943 	}
   1944 
   1945 	if ctxt.Headtype == obj.Hnacl {
   1946 		c = naclpad(ctxt, s, c, -c&31)
   1947 	}
   1948 
   1949 	s.Size = int64(c)
   1950 
   1951 	if false { /* debug['a'] > 1 */
   1952 		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
   1953 		var i int
   1954 		for i = 0; i < len(s.P); i++ {
   1955 			fmt.Printf(" %.2x", s.P[i])
   1956 			if i%16 == 15 {
   1957 				fmt.Printf("\n  %.6x", uint(i+1))
   1958 			}
   1959 		}
   1960 
   1961 		if i%16 != 0 {
   1962 			fmt.Printf("\n")
   1963 		}
   1964 
   1965 		for i := 0; i < len(s.R); i++ {
   1966 			r := &s.R[i]
   1967 			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
   1968 		}
   1969 	}
   1970 }
   1971 
   1972 func instinit() {
   1973 	for i := 1; optab[i].as != 0; i++ {
   1974 		c := optab[i].as
   1975 		if opindex[c&obj.AMask] != nil {
   1976 			log.Fatalf("phase error in optab: %d (%v)", i, c)
   1977 		}
   1978 		opindex[c&obj.AMask] = &optab[i]
   1979 	}
   1980 
   1981 	for i := 0; i < Ymax; i++ {
   1982 		ycover[i*Ymax+i] = 1
   1983 	}
   1984 
   1985 	ycover[Yi0*Ymax+Yi8] = 1
   1986 	ycover[Yi1*Ymax+Yi8] = 1
   1987 	ycover[Yu7*Ymax+Yi8] = 1
   1988 
   1989 	ycover[Yi0*Ymax+Yu7] = 1
   1990 	ycover[Yi1*Ymax+Yu7] = 1
   1991 
   1992 	ycover[Yi0*Ymax+Yu8] = 1
   1993 	ycover[Yi1*Ymax+Yu8] = 1
   1994 	ycover[Yu7*Ymax+Yu8] = 1
   1995 
   1996 	ycover[Yi0*Ymax+Ys32] = 1
   1997 	ycover[Yi1*Ymax+Ys32] = 1
   1998 	ycover[Yu7*Ymax+Ys32] = 1
   1999 	ycover[Yu8*Ymax+Ys32] = 1
   2000 	ycover[Yi8*Ymax+Ys32] = 1
   2001 
   2002 	ycover[Yi0*Ymax+Yi32] = 1
   2003 	ycover[Yi1*Ymax+Yi32] = 1
   2004 	ycover[Yu7*Ymax+Yi32] = 1
   2005 	ycover[Yu8*Ymax+Yi32] = 1
   2006 	ycover[Yi8*Ymax+Yi32] = 1
   2007 	ycover[Ys32*Ymax+Yi32] = 1
   2008 
   2009 	ycover[Yi0*Ymax+Yi64] = 1
   2010 	ycover[Yi1*Ymax+Yi64] = 1
   2011 	ycover[Yu7*Ymax+Yi64] = 1
   2012 	ycover[Yu8*Ymax+Yi64] = 1
   2013 	ycover[Yi8*Ymax+Yi64] = 1
   2014 	ycover[Ys32*Ymax+Yi64] = 1
   2015 	ycover[Yi32*Ymax+Yi64] = 1
   2016 
   2017 	ycover[Yal*Ymax+Yrb] = 1
   2018 	ycover[Ycl*Ymax+Yrb] = 1
   2019 	ycover[Yax*Ymax+Yrb] = 1
   2020 	ycover[Ycx*Ymax+Yrb] = 1
   2021 	ycover[Yrx*Ymax+Yrb] = 1
   2022 	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
   2023 
   2024 	ycover[Ycl*Ymax+Ycx] = 1
   2025 
   2026 	ycover[Yax*Ymax+Yrx] = 1
   2027 	ycover[Ycx*Ymax+Yrx] = 1
   2028 
   2029 	ycover[Yax*Ymax+Yrl] = 1
   2030 	ycover[Ycx*Ymax+Yrl] = 1
   2031 	ycover[Yrx*Ymax+Yrl] = 1
   2032 	ycover[Yrl32*Ymax+Yrl] = 1
   2033 
   2034 	ycover[Yf0*Ymax+Yrf] = 1
   2035 
   2036 	ycover[Yal*Ymax+Ymb] = 1
   2037 	ycover[Ycl*Ymax+Ymb] = 1
   2038 	ycover[Yax*Ymax+Ymb] = 1
   2039 	ycover[Ycx*Ymax+Ymb] = 1
   2040 	ycover[Yrx*Ymax+Ymb] = 1
   2041 	ycover[Yrb*Ymax+Ymb] = 1
   2042 	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
   2043 	ycover[Ym*Ymax+Ymb] = 1
   2044 
   2045 	ycover[Yax*Ymax+Yml] = 1
   2046 	ycover[Ycx*Ymax+Yml] = 1
   2047 	ycover[Yrx*Ymax+Yml] = 1
   2048 	ycover[Yrl*Ymax+Yml] = 1
   2049 	ycover[Yrl32*Ymax+Yml] = 1
   2050 	ycover[Ym*Ymax+Yml] = 1
   2051 
   2052 	ycover[Yax*Ymax+Ymm] = 1
   2053 	ycover[Ycx*Ymax+Ymm] = 1
   2054 	ycover[Yrx*Ymax+Ymm] = 1
   2055 	ycover[Yrl*Ymax+Ymm] = 1
   2056 	ycover[Yrl32*Ymax+Ymm] = 1
   2057 	ycover[Ym*Ymax+Ymm] = 1
   2058 	ycover[Ymr*Ymax+Ymm] = 1
   2059 
   2060 	ycover[Ym*Ymax+Yxm] = 1
   2061 	ycover[Yxr*Ymax+Yxm] = 1
   2062 
   2063 	ycover[Ym*Ymax+Yym] = 1
   2064 	ycover[Yyr*Ymax+Yym] = 1
   2065 
   2066 	for i := 0; i < MAXREG; i++ {
   2067 		reg[i] = -1
   2068 		if i >= REG_AL && i <= REG_R15B {
   2069 			reg[i] = (i - REG_AL) & 7
   2070 			if i >= REG_SPB && i <= REG_DIB {
   2071 				regrex[i] = 0x40
   2072 			}
   2073 			if i >= REG_R8B && i <= REG_R15B {
   2074 				regrex[i] = Rxr | Rxx | Rxb
   2075 			}
   2076 		}
   2077 
   2078 		if i >= REG_AH && i <= REG_BH {
   2079 			reg[i] = 4 + ((i - REG_AH) & 7)
   2080 		}
   2081 		if i >= REG_AX && i <= REG_R15 {
   2082 			reg[i] = (i - REG_AX) & 7
   2083 			if i >= REG_R8 {
   2084 				regrex[i] = Rxr | Rxx | Rxb
   2085 			}
   2086 		}
   2087 
   2088 		if i >= REG_F0 && i <= REG_F0+7 {
   2089 			reg[i] = (i - REG_F0) & 7
   2090 		}
   2091 		if i >= REG_M0 && i <= REG_M0+7 {
   2092 			reg[i] = (i - REG_M0) & 7
   2093 		}
   2094 		if i >= REG_X0 && i <= REG_X0+15 {
   2095 			reg[i] = (i - REG_X0) & 7
   2096 			if i >= REG_X0+8 {
   2097 				regrex[i] = Rxr | Rxx | Rxb
   2098 			}
   2099 		}
   2100 		if i >= REG_Y0 && i <= REG_Y0+15 {
   2101 			reg[i] = (i - REG_Y0) & 7
   2102 			if i >= REG_Y0+8 {
   2103 				regrex[i] = Rxr | Rxx | Rxb
   2104 			}
   2105 		}
   2106 
   2107 		if i >= REG_CR+8 && i <= REG_CR+15 {
   2108 			regrex[i] = Rxr
   2109 		}
   2110 	}
   2111 }
   2112 
   2113 var isAndroid = (obj.GOOS == "android")
   2114 
   2115 func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2116 	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
   2117 		return 0
   2118 	}
   2119 	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
   2120 		switch a.Reg {
   2121 		case REG_CS:
   2122 			return 0x2e
   2123 
   2124 		case REG_DS:
   2125 			return 0x3e
   2126 
   2127 		case REG_ES:
   2128 			return 0x26
   2129 
   2130 		case REG_FS:
   2131 			return 0x64
   2132 
   2133 		case REG_GS:
   2134 			return 0x65
   2135 
   2136 		case REG_TLS:
   2137 			// NOTE: Systems listed here should be only systems that
   2138 			// support direct TLS references like 8(TLS) implemented as
   2139 			// direct references from FS or GS. Systems that require
   2140 			// the initial-exec model, where you load the TLS base into
   2141 			// a register and then index from that register, do not reach
   2142 			// this code and should not be listed.
   2143 			if p.Mode == 32 {
   2144 				switch ctxt.Headtype {
   2145 				default:
   2146 					if isAndroid {
   2147 						return 0x65 // GS
   2148 					}
   2149 					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2150 
   2151 				case obj.Hdarwin,
   2152 					obj.Hdragonfly,
   2153 					obj.Hfreebsd,
   2154 					obj.Hnetbsd,
   2155 					obj.Hopenbsd:
   2156 					return 0x65 // GS
   2157 				}
   2158 			}
   2159 
   2160 			switch ctxt.Headtype {
   2161 			default:
   2162 				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2163 
   2164 			case obj.Hlinux:
   2165 				if isAndroid {
   2166 					return 0x64 // FS
   2167 				}
   2168 
   2169 				if ctxt.Flag_shared {
   2170 					log.Fatalf("unknown TLS base register for linux with -shared")
   2171 				} else {
   2172 					return 0x64 // FS
   2173 				}
   2174 
   2175 			case obj.Hdragonfly,
   2176 				obj.Hfreebsd,
   2177 				obj.Hnetbsd,
   2178 				obj.Hopenbsd,
   2179 				obj.Hsolaris:
   2180 				return 0x64 // FS
   2181 
   2182 			case obj.Hdarwin:
   2183 				return 0x65 // GS
   2184 			}
   2185 		}
   2186 	}
   2187 
   2188 	if p.Mode == 32 {
   2189 		if a.Index == REG_TLS && ctxt.Flag_shared {
   2190 			// When building for inclusion into a shared library, an instruction of the form
   2191 			//     MOVL 0(CX)(TLS*1), AX
   2192 			// becomes
   2193 			//     mov %gs:(%ecx), %eax
   2194 			// which assumes that the correct TLS offset has been loaded into %ecx (today
   2195 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2196 			// a shared library the instruction it becomes
   2197 			//     mov 0x0(%ecx), $eax
   2198 			// and a R_TLS_LE relocation, and so does not require a prefix.
   2199 			if a.Offset != 0 {
   2200 				ctxt.Diag("cannot handle non-0 offsets to TLS")
   2201 			}
   2202 			return 0x65 // GS
   2203 		}
   2204 		return 0
   2205 	}
   2206 
   2207 	switch a.Index {
   2208 	case REG_CS:
   2209 		return 0x2e
   2210 
   2211 	case REG_DS:
   2212 		return 0x3e
   2213 
   2214 	case REG_ES:
   2215 		return 0x26
   2216 
   2217 	case REG_TLS:
   2218 		if ctxt.Flag_shared {
   2219 			// When building for inclusion into a shared library, an instruction of the form
   2220 			//     MOV 0(CX)(TLS*1), AX
   2221 			// becomes
   2222 			//     mov %fs:(%rcx), %rax
   2223 			// which assumes that the correct TLS offset has been loaded into %rcx (today
   2224 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2225 			// a shared library the instruction does not require a prefix.
   2226 			if a.Offset != 0 {
   2227 				log.Fatalf("cannot handle non-0 offsets to TLS")
   2228 			}
   2229 			return 0x64
   2230 		}
   2231 
   2232 	case REG_FS:
   2233 		return 0x64
   2234 
   2235 	case REG_GS:
   2236 		return 0x65
   2237 	}
   2238 
   2239 	return 0
   2240 }
   2241 
   2242 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2243 	switch a.Type {
   2244 	case obj.TYPE_NONE:
   2245 		return Ynone
   2246 
   2247 	case obj.TYPE_BRANCH:
   2248 		return Ybr
   2249 
   2250 	case obj.TYPE_INDIR:
   2251 		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
   2252 			return Yindir
   2253 		}
   2254 		return Yxxx
   2255 
   2256 	case obj.TYPE_MEM:
   2257 		if a.Index == REG_SP {
   2258 			// Can't use SP as the index register
   2259 			return Yxxx
   2260 		}
   2261 		if ctxt.Asmode == 64 {
   2262 			switch a.Name {
   2263 			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
   2264 				// Global variables can't use index registers and their
   2265 				// base register is %rip (%rip is encoded as REG_NONE).
   2266 				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
   2267 					return Yxxx
   2268 				}
   2269 			case obj.NAME_AUTO, obj.NAME_PARAM:
   2270 				// These names must have a base of SP.  The old compiler
   2271 				// uses 0 for the base register. SSA uses REG_SP.
   2272 				if a.Reg != REG_SP && a.Reg != 0 {
   2273 					return Yxxx
   2274 				}
   2275 			case obj.NAME_NONE:
   2276 				// everything is ok
   2277 			default:
   2278 				// unknown name
   2279 				return Yxxx
   2280 			}
   2281 		}
   2282 		return Ym
   2283 
   2284 	case obj.TYPE_ADDR:
   2285 		switch a.Name {
   2286 		case obj.NAME_GOTREF:
   2287 			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
   2288 			return Yxxx
   2289 
   2290 		case obj.NAME_EXTERN,
   2291 			obj.NAME_STATIC:
   2292 			if a.Sym != nil && isextern(a.Sym) || (p.Mode == 32 && !ctxt.Flag_shared) {
   2293 				return Yi32
   2294 			}
   2295 			return Yiauto // use pc-relative addressing
   2296 
   2297 		case obj.NAME_AUTO,
   2298 			obj.NAME_PARAM:
   2299 			return Yiauto
   2300 		}
   2301 
   2302 		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
   2303 		// and got Yi32 in an earlier version of this code.
   2304 		// Keep doing that until we fix yduff etc.
   2305 		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
   2306 			return Yi32
   2307 		}
   2308 
   2309 		if a.Sym != nil || a.Name != obj.NAME_NONE {
   2310 			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
   2311 		}
   2312 		fallthrough
   2313 
   2314 		// fall through
   2315 
   2316 	case obj.TYPE_CONST:
   2317 		if a.Sym != nil {
   2318 			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
   2319 		}
   2320 
   2321 		v := a.Offset
   2322 		if p.Mode == 32 {
   2323 			v = int64(int32(v))
   2324 		}
   2325 		if v == 0 {
   2326 			if p.Mark&PRESERVEFLAGS != 0 {
   2327 				// If PRESERVEFLAGS is set, avoid MOV $0, AX turning into XOR AX, AX.
   2328 				return Yu7
   2329 			}
   2330 			return Yi0
   2331 		}
   2332 		if v == 1 {
   2333 			return Yi1
   2334 		}
   2335 		if v >= 0 && v <= 127 {
   2336 			return Yu7
   2337 		}
   2338 		if v >= 0 && v <= 255 {
   2339 			return Yu8
   2340 		}
   2341 		if v >= -128 && v <= 127 {
   2342 			return Yi8
   2343 		}
   2344 		if p.Mode == 32 {
   2345 			return Yi32
   2346 		}
   2347 		l := int32(v)
   2348 		if int64(l) == v {
   2349 			return Ys32 /* can sign extend */
   2350 		}
   2351 		if v>>32 == 0 {
   2352 			return Yi32 /* unsigned */
   2353 		}
   2354 		return Yi64
   2355 
   2356 	case obj.TYPE_TEXTSIZE:
   2357 		return Ytextsize
   2358 	}
   2359 
   2360 	if a.Type != obj.TYPE_REG {
   2361 		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
   2362 		return Yxxx
   2363 	}
   2364 
   2365 	switch a.Reg {
   2366 	case REG_AL:
   2367 		return Yal
   2368 
   2369 	case REG_AX:
   2370 		return Yax
   2371 
   2372 		/*
   2373 			case REG_SPB:
   2374 		*/
   2375 	case REG_BPB,
   2376 		REG_SIB,
   2377 		REG_DIB,
   2378 		REG_R8B,
   2379 		REG_R9B,
   2380 		REG_R10B,
   2381 		REG_R11B,
   2382 		REG_R12B,
   2383 		REG_R13B,
   2384 		REG_R14B,
   2385 		REG_R15B:
   2386 		if ctxt.Asmode != 64 {
   2387 			return Yxxx
   2388 		}
   2389 		fallthrough
   2390 
   2391 	case REG_DL,
   2392 		REG_BL,
   2393 		REG_AH,
   2394 		REG_CH,
   2395 		REG_DH,
   2396 		REG_BH:
   2397 		return Yrb
   2398 
   2399 	case REG_CL:
   2400 		return Ycl
   2401 
   2402 	case REG_CX:
   2403 		return Ycx
   2404 
   2405 	case REG_DX, REG_BX:
   2406 		return Yrx
   2407 
   2408 	case REG_R8, /* not really Yrl */
   2409 		REG_R9,
   2410 		REG_R10,
   2411 		REG_R11,
   2412 		REG_R12,
   2413 		REG_R13,
   2414 		REG_R14,
   2415 		REG_R15:
   2416 		if ctxt.Asmode != 64 {
   2417 			return Yxxx
   2418 		}
   2419 		fallthrough
   2420 
   2421 	case REG_SP, REG_BP, REG_SI, REG_DI:
   2422 		if p.Mode == 32 {
   2423 			return Yrl32
   2424 		}
   2425 		return Yrl
   2426 
   2427 	case REG_F0 + 0:
   2428 		return Yf0
   2429 
   2430 	case REG_F0 + 1,
   2431 		REG_F0 + 2,
   2432 		REG_F0 + 3,
   2433 		REG_F0 + 4,
   2434 		REG_F0 + 5,
   2435 		REG_F0 + 6,
   2436 		REG_F0 + 7:
   2437 		return Yrf
   2438 
   2439 	case REG_M0 + 0,
   2440 		REG_M0 + 1,
   2441 		REG_M0 + 2,
   2442 		REG_M0 + 3,
   2443 		REG_M0 + 4,
   2444 		REG_M0 + 5,
   2445 		REG_M0 + 6,
   2446 		REG_M0 + 7:
   2447 		return Ymr
   2448 
   2449 	case REG_X0 + 0,
   2450 		REG_X0 + 1,
   2451 		REG_X0 + 2,
   2452 		REG_X0 + 3,
   2453 		REG_X0 + 4,
   2454 		REG_X0 + 5,
   2455 		REG_X0 + 6,
   2456 		REG_X0 + 7,
   2457 		REG_X0 + 8,
   2458 		REG_X0 + 9,
   2459 		REG_X0 + 10,
   2460 		REG_X0 + 11,
   2461 		REG_X0 + 12,
   2462 		REG_X0 + 13,
   2463 		REG_X0 + 14,
   2464 		REG_X0 + 15:
   2465 		return Yxr
   2466 
   2467 	case REG_Y0 + 0,
   2468 		REG_Y0 + 1,
   2469 		REG_Y0 + 2,
   2470 		REG_Y0 + 3,
   2471 		REG_Y0 + 4,
   2472 		REG_Y0 + 5,
   2473 		REG_Y0 + 6,
   2474 		REG_Y0 + 7,
   2475 		REG_Y0 + 8,
   2476 		REG_Y0 + 9,
   2477 		REG_Y0 + 10,
   2478 		REG_Y0 + 11,
   2479 		REG_Y0 + 12,
   2480 		REG_Y0 + 13,
   2481 		REG_Y0 + 14,
   2482 		REG_Y0 + 15:
   2483 		return Yyr
   2484 
   2485 	case REG_CS:
   2486 		return Ycs
   2487 	case REG_SS:
   2488 		return Yss
   2489 	case REG_DS:
   2490 		return Yds
   2491 	case REG_ES:
   2492 		return Yes
   2493 	case REG_FS:
   2494 		return Yfs
   2495 	case REG_GS:
   2496 		return Ygs
   2497 	case REG_TLS:
   2498 		return Ytls
   2499 
   2500 	case REG_GDTR:
   2501 		return Ygdtr
   2502 	case REG_IDTR:
   2503 		return Yidtr
   2504 	case REG_LDTR:
   2505 		return Yldtr
   2506 	case REG_MSW:
   2507 		return Ymsw
   2508 	case REG_TASK:
   2509 		return Ytask
   2510 
   2511 	case REG_CR + 0:
   2512 		return Ycr0
   2513 	case REG_CR + 1:
   2514 		return Ycr1
   2515 	case REG_CR + 2:
   2516 		return Ycr2
   2517 	case REG_CR + 3:
   2518 		return Ycr3
   2519 	case REG_CR + 4:
   2520 		return Ycr4
   2521 	case REG_CR + 5:
   2522 		return Ycr5
   2523 	case REG_CR + 6:
   2524 		return Ycr6
   2525 	case REG_CR + 7:
   2526 		return Ycr7
   2527 	case REG_CR + 8:
   2528 		return Ycr8
   2529 
   2530 	case REG_DR + 0:
   2531 		return Ydr0
   2532 	case REG_DR + 1:
   2533 		return Ydr1
   2534 	case REG_DR + 2:
   2535 		return Ydr2
   2536 	case REG_DR + 3:
   2537 		return Ydr3
   2538 	case REG_DR + 4:
   2539 		return Ydr4
   2540 	case REG_DR + 5:
   2541 		return Ydr5
   2542 	case REG_DR + 6:
   2543 		return Ydr6
   2544 	case REG_DR + 7:
   2545 		return Ydr7
   2546 
   2547 	case REG_TR + 0:
   2548 		return Ytr0
   2549 	case REG_TR + 1:
   2550 		return Ytr1
   2551 	case REG_TR + 2:
   2552 		return Ytr2
   2553 	case REG_TR + 3:
   2554 		return Ytr3
   2555 	case REG_TR + 4:
   2556 		return Ytr4
   2557 	case REG_TR + 5:
   2558 		return Ytr5
   2559 	case REG_TR + 6:
   2560 		return Ytr6
   2561 	case REG_TR + 7:
   2562 		return Ytr7
   2563 	}
   2564 
   2565 	return Yxxx
   2566 }
   2567 
   2568 func asmidx(ctxt *obj.Link, scale int, index int, base int) {
   2569 	var i int
   2570 
   2571 	switch index {
   2572 	default:
   2573 		goto bad
   2574 
   2575 	case REG_NONE:
   2576 		i = 4 << 3
   2577 		goto bas
   2578 
   2579 	case REG_R8,
   2580 		REG_R9,
   2581 		REG_R10,
   2582 		REG_R11,
   2583 		REG_R12,
   2584 		REG_R13,
   2585 		REG_R14,
   2586 		REG_R15:
   2587 		if ctxt.Asmode != 64 {
   2588 			goto bad
   2589 		}
   2590 		fallthrough
   2591 
   2592 	case REG_AX,
   2593 		REG_CX,
   2594 		REG_DX,
   2595 		REG_BX,
   2596 		REG_BP,
   2597 		REG_SI,
   2598 		REG_DI:
   2599 		i = reg[index] << 3
   2600 	}
   2601 
   2602 	switch scale {
   2603 	default:
   2604 		goto bad
   2605 
   2606 	case 1:
   2607 		break
   2608 
   2609 	case 2:
   2610 		i |= 1 << 6
   2611 
   2612 	case 4:
   2613 		i |= 2 << 6
   2614 
   2615 	case 8:
   2616 		i |= 3 << 6
   2617 	}
   2618 
   2619 bas:
   2620 	switch base {
   2621 	default:
   2622 		goto bad
   2623 
   2624 	case REG_NONE: /* must be mod=00 */
   2625 		i |= 5
   2626 
   2627 	case REG_R8,
   2628 		REG_R9,
   2629 		REG_R10,
   2630 		REG_R11,
   2631 		REG_R12,
   2632 		REG_R13,
   2633 		REG_R14,
   2634 		REG_R15:
   2635 		if ctxt.Asmode != 64 {
   2636 			goto bad
   2637 		}
   2638 		fallthrough
   2639 
   2640 	case REG_AX,
   2641 		REG_CX,
   2642 		REG_DX,
   2643 		REG_BX,
   2644 		REG_SP,
   2645 		REG_BP,
   2646 		REG_SI,
   2647 		REG_DI:
   2648 		i |= reg[base]
   2649 	}
   2650 
   2651 	ctxt.AsmBuf.Put1(byte(i))
   2652 	return
   2653 
   2654 bad:
   2655 	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
   2656 	ctxt.AsmBuf.Put1(0)
   2657 	return
   2658 }
   2659 
   2660 func relput4(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) {
   2661 	var rel obj.Reloc
   2662 
   2663 	v := vaddr(ctxt, p, a, &rel)
   2664 	if rel.Siz != 0 {
   2665 		if rel.Siz != 4 {
   2666 			ctxt.Diag("bad reloc")
   2667 		}
   2668 		r := obj.Addrel(ctxt.Cursym)
   2669 		*r = rel
   2670 		r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   2671 	}
   2672 
   2673 	ctxt.AsmBuf.PutInt32(int32(v))
   2674 }
   2675 
   2676 /*
   2677 static void
   2678 relput8(Prog *p, Addr *a)
   2679 {
   2680 	vlong v;
   2681 	Reloc rel, *r;
   2682 
   2683 	v = vaddr(ctxt, p, a, &rel);
   2684 	if(rel.siz != 0) {
   2685 		r = addrel(ctxt->cursym);
   2686 		*r = rel;
   2687 		r->siz = 8;
   2688 		r->off = p->pc + ctxt->andptr - ctxt->and;
   2689 	}
   2690 	put8(ctxt, v);
   2691 }
   2692 */
   2693 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
   2694 	if r != nil {
   2695 		*r = obj.Reloc{}
   2696 	}
   2697 
   2698 	switch a.Name {
   2699 	case obj.NAME_STATIC,
   2700 		obj.NAME_GOTREF,
   2701 		obj.NAME_EXTERN:
   2702 		s := a.Sym
   2703 		if r == nil {
   2704 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   2705 			log.Fatalf("reloc")
   2706 		}
   2707 
   2708 		if a.Name == obj.NAME_GOTREF {
   2709 			r.Siz = 4
   2710 			r.Type = obj.R_GOTPCREL
   2711 		} else if isextern(s) || (p.Mode != 64 && !ctxt.Flag_shared) {
   2712 			r.Siz = 4
   2713 			r.Type = obj.R_ADDR
   2714 		} else {
   2715 			r.Siz = 4
   2716 			r.Type = obj.R_PCREL
   2717 		}
   2718 
   2719 		r.Off = -1 // caller must fill in
   2720 		r.Sym = s
   2721 		r.Add = a.Offset
   2722 
   2723 		return 0
   2724 	}
   2725 
   2726 	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
   2727 		if r == nil {
   2728 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   2729 			log.Fatalf("reloc")
   2730 		}
   2731 
   2732 		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == obj.Hdarwin {
   2733 			r.Type = obj.R_TLS_LE
   2734 			r.Siz = 4
   2735 			r.Off = -1 // caller must fill in
   2736 			r.Add = a.Offset
   2737 		}
   2738 		return 0
   2739 	}
   2740 
   2741 	return a.Offset
   2742 }
   2743 
   2744 func asmandsz(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
   2745 	var base int
   2746 	var rel obj.Reloc
   2747 
   2748 	rex &= 0x40 | Rxr
   2749 	switch {
   2750 	case int64(int32(a.Offset)) == a.Offset:
   2751 		// Offset fits in sign-extended 32 bits.
   2752 	case int64(uint32(a.Offset)) == a.Offset && ctxt.Rexflag&Rxw == 0:
   2753 		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
   2754 		// This is allowed for assembly that wants to use 32-bit hex
   2755 		// constants, e.g. LEAL 0x99999999(AX), AX.
   2756 	default:
   2757 		ctxt.Diag("offset too large in %s", p)
   2758 	}
   2759 	v := int32(a.Offset)
   2760 	rel.Siz = 0
   2761 
   2762 	switch a.Type {
   2763 	case obj.TYPE_ADDR:
   2764 		if a.Name == obj.NAME_NONE {
   2765 			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
   2766 		}
   2767 		if a.Index == REG_TLS {
   2768 			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
   2769 		}
   2770 		goto bad
   2771 
   2772 	case obj.TYPE_REG:
   2773 		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
   2774 			goto bad
   2775 		}
   2776 		if v != 0 {
   2777 			goto bad
   2778 		}
   2779 		ctxt.AsmBuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
   2780 		ctxt.Rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
   2781 		return
   2782 	}
   2783 
   2784 	if a.Type != obj.TYPE_MEM {
   2785 		goto bad
   2786 	}
   2787 
   2788 	if a.Index != REG_NONE && a.Index != REG_TLS {
   2789 		base := int(a.Reg)
   2790 		switch a.Name {
   2791 		case obj.NAME_EXTERN,
   2792 			obj.NAME_GOTREF,
   2793 			obj.NAME_STATIC:
   2794 			if !isextern(a.Sym) && p.Mode == 64 {
   2795 				goto bad
   2796 			}
   2797 			if p.Mode == 32 && ctxt.Flag_shared {
   2798 				// The base register has already been set. It holds the PC
   2799 				// of this instruction returned by a PC-reading thunk.
   2800 				// See obj6.go:rewriteToPcrel.
   2801 			} else {
   2802 				base = REG_NONE
   2803 			}
   2804 			v = int32(vaddr(ctxt, p, a, &rel))
   2805 
   2806 		case obj.NAME_AUTO,
   2807 			obj.NAME_PARAM:
   2808 			base = REG_SP
   2809 		}
   2810 
   2811 		ctxt.Rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
   2812 		if base == REG_NONE {
   2813 			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
   2814 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2815 			goto putrelv
   2816 		}
   2817 
   2818 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   2819 			ctxt.AsmBuf.Put1(byte(0<<6 | 4<<0 | r<<3))
   2820 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2821 			return
   2822 		}
   2823 
   2824 		if v >= -128 && v < 128 && rel.Siz == 0 {
   2825 			ctxt.AsmBuf.Put1(byte(1<<6 | 4<<0 | r<<3))
   2826 			asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2827 			ctxt.AsmBuf.Put1(byte(v))
   2828 			return
   2829 		}
   2830 
   2831 		ctxt.AsmBuf.Put1(byte(2<<6 | 4<<0 | r<<3))
   2832 		asmidx(ctxt, int(a.Scale), int(a.Index), base)
   2833 		goto putrelv
   2834 	}
   2835 
   2836 	base = int(a.Reg)
   2837 	switch a.Name {
   2838 	case obj.NAME_STATIC,
   2839 		obj.NAME_GOTREF,
   2840 		obj.NAME_EXTERN:
   2841 		if a.Sym == nil {
   2842 			ctxt.Diag("bad addr: %v", p)
   2843 		}
   2844 		if p.Mode == 32 && ctxt.Flag_shared {
   2845 			// The base register has already been set. It holds the PC
   2846 			// of this instruction returned by a PC-reading thunk.
   2847 			// See obj6.go:rewriteToPcrel.
   2848 		} else {
   2849 			base = REG_NONE
   2850 		}
   2851 		v = int32(vaddr(ctxt, p, a, &rel))
   2852 
   2853 	case obj.NAME_AUTO,
   2854 		obj.NAME_PARAM:
   2855 		base = REG_SP
   2856 	}
   2857 
   2858 	if base == REG_TLS {
   2859 		v = int32(vaddr(ctxt, p, a, &rel))
   2860 	}
   2861 
   2862 	ctxt.Rexflag |= regrex[base]&Rxb | rex
   2863 	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
   2864 		if (a.Sym == nil || !isextern(a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || p.Mode != 64 {
   2865 			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
   2866 				ctxt.Diag("%v has offset against gotref", p)
   2867 			}
   2868 			ctxt.AsmBuf.Put1(byte(0<<6 | 5<<0 | r<<3))
   2869 			goto putrelv
   2870 		}
   2871 
   2872 		// temporary
   2873 		ctxt.AsmBuf.Put2(
   2874 			byte(0<<6|4<<0|r<<3), // sib present
   2875 			0<<6|4<<3|5<<0,       // DS:d32
   2876 		)
   2877 		goto putrelv
   2878 	}
   2879 
   2880 	if base == REG_SP || base == REG_R12 {
   2881 		if v == 0 {
   2882 			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   2883 			asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2884 			return
   2885 		}
   2886 
   2887 		if v >= -128 && v < 128 {
   2888 			ctxt.AsmBuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
   2889 			asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2890 			ctxt.AsmBuf.Put1(byte(v))
   2891 			return
   2892 		}
   2893 
   2894 		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   2895 		asmidx(ctxt, int(a.Scale), REG_NONE, base)
   2896 		goto putrelv
   2897 	}
   2898 
   2899 	if REG_AX <= base && base <= REG_R15 {
   2900 		if a.Index == REG_TLS && !ctxt.Flag_shared {
   2901 			rel = obj.Reloc{}
   2902 			rel.Type = obj.R_TLS_LE
   2903 			rel.Siz = 4
   2904 			rel.Sym = nil
   2905 			rel.Add = int64(v)
   2906 			v = 0
   2907 		}
   2908 
   2909 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   2910 			ctxt.AsmBuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   2911 			return
   2912 		}
   2913 
   2914 		if v >= -128 && v < 128 && rel.Siz == 0 {
   2915 			ctxt.AsmBuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
   2916 			return
   2917 		}
   2918 
   2919 		ctxt.AsmBuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   2920 		goto putrelv
   2921 	}
   2922 
   2923 	goto bad
   2924 
   2925 putrelv:
   2926 	if rel.Siz != 0 {
   2927 		if rel.Siz != 4 {
   2928 			ctxt.Diag("bad rel")
   2929 			goto bad
   2930 		}
   2931 
   2932 		r := obj.Addrel(ctxt.Cursym)
   2933 		*r = rel
   2934 		r.Off = int32(ctxt.Curp.Pc + int64(ctxt.AsmBuf.Len()))
   2935 	}
   2936 
   2937 	ctxt.AsmBuf.PutInt32(v)
   2938 	return
   2939 
   2940 bad:
   2941 	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
   2942 	return
   2943 }
   2944 
   2945 func asmand(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
   2946 	asmandsz(ctxt, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
   2947 }
   2948 
   2949 func asmando(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, o int) {
   2950 	asmandsz(ctxt, p, a, o, 0, 0)
   2951 }
   2952 
   2953 func bytereg(a *obj.Addr, t *uint8) {
   2954 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
   2955 		a.Reg += REG_AL - REG_AX
   2956 		*t = 0
   2957 	}
   2958 }
   2959 
   2960 func unbytereg(a *obj.Addr, t *uint8) {
   2961 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
   2962 		a.Reg += REG_AX - REG_AL
   2963 		*t = 0
   2964 	}
   2965 }
   2966 
   2967 const (
   2968 	E = 0xff
   2969 )
   2970 
   2971 var ymovtab = []Movtab{
   2972 	/* push */
   2973 	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
   2974 	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
   2975 	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
   2976 	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
   2977 	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   2978 	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   2979 	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   2980 	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   2981 	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
   2982 	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
   2983 	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
   2984 	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
   2985 	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
   2986 	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
   2987 
   2988 	/* pop */
   2989 	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
   2990 	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
   2991 	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
   2992 	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   2993 	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   2994 	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   2995 	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   2996 	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
   2997 	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
   2998 	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
   2999 	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
   3000 	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
   3001 
   3002 	/* mov seg */
   3003 	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
   3004 	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
   3005 	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
   3006 	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
   3007 	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
   3008 	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
   3009 	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
   3010 	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
   3011 	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
   3012 	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
   3013 	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
   3014 	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
   3015 
   3016 	/* mov cr */
   3017 	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   3018 	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   3019 	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   3020 	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   3021 	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   3022 	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   3023 	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   3024 	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   3025 	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   3026 	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   3027 	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   3028 	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   3029 	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   3030 	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   3031 	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   3032 	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   3033 	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   3034 	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   3035 	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   3036 	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   3037 
   3038 	/* mov dr */
   3039 	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   3040 	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   3041 	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   3042 	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   3043 	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   3044 	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   3045 	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   3046 	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   3047 	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   3048 	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   3049 	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   3050 	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   3051 
   3052 	/* mov tr */
   3053 	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
   3054 	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
   3055 	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
   3056 	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
   3057 
   3058 	/* lgdt, sgdt, lidt, sidt */
   3059 	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   3060 	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   3061 	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   3062 	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   3063 	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   3064 	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   3065 	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   3066 	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   3067 
   3068 	/* lldt, sldt */
   3069 	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
   3070 	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
   3071 
   3072 	/* lmsw, smsw */
   3073 	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
   3074 	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
   3075 
   3076 	/* ltr, str */
   3077 	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
   3078 	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
   3079 
   3080 	/* load full pointer - unsupported
   3081 	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
   3082 	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
   3083 	*/
   3084 
   3085 	/* double shift */
   3086 	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3087 	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3088 	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3089 	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3090 	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3091 	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3092 	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3093 	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3094 	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3095 	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3096 	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3097 	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3098 	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3099 	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3100 	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3101 	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3102 	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3103 	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3104 
   3105 	/* load TLS base */
   3106 	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   3107 	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   3108 	{0, 0, 0, 0, 0, [4]uint8{}},
   3109 }
   3110 
   3111 func isax(a *obj.Addr) bool {
   3112 	switch a.Reg {
   3113 	case REG_AX, REG_AL, REG_AH:
   3114 		return true
   3115 	}
   3116 
   3117 	if a.Index == REG_AX {
   3118 		return true
   3119 	}
   3120 	return false
   3121 }
   3122 
   3123 func subreg(p *obj.Prog, from int, to int) {
   3124 	if false { /* debug['Q'] */
   3125 		fmt.Printf("\n%v\ts/%v/%v/\n", p, Rconv(from), Rconv(to))
   3126 	}
   3127 
   3128 	if int(p.From.Reg) == from {
   3129 		p.From.Reg = int16(to)
   3130 		p.Ft = 0
   3131 	}
   3132 
   3133 	if int(p.To.Reg) == from {
   3134 		p.To.Reg = int16(to)
   3135 		p.Tt = 0
   3136 	}
   3137 
   3138 	if int(p.From.Index) == from {
   3139 		p.From.Index = int16(to)
   3140 		p.Ft = 0
   3141 	}
   3142 
   3143 	if int(p.To.Index) == from {
   3144 		p.To.Index = int16(to)
   3145 		p.Tt = 0
   3146 	}
   3147 
   3148 	if false { /* debug['Q'] */
   3149 		fmt.Printf("%v\n", p)
   3150 	}
   3151 }
   3152 
   3153 func mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
   3154 	switch op {
   3155 	case Pm, Pe, Pf2, Pf3:
   3156 		if osize != 1 {
   3157 			if op != Pm {
   3158 				ctxt.AsmBuf.Put1(byte(op))
   3159 			}
   3160 			ctxt.AsmBuf.Put1(Pm)
   3161 			z++
   3162 			op = int(o.op[z])
   3163 			break
   3164 		}
   3165 		fallthrough
   3166 
   3167 	default:
   3168 		if ctxt.AsmBuf.Len() == 0 || ctxt.AsmBuf.Last() != Pm {
   3169 			ctxt.AsmBuf.Put1(Pm)
   3170 		}
   3171 	}
   3172 
   3173 	ctxt.AsmBuf.Put1(byte(op))
   3174 	return z
   3175 }
   3176 
   3177 var bpduff1 = []byte{
   3178 	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
   3179 	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
   3180 }
   3181 
   3182 var bpduff2 = []byte{
   3183 	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
   3184 }
   3185 
   3186 // Emit VEX prefix and opcode byte.
   3187 // The three addresses are the r/m, vvvv, and reg fields.
   3188 // The reg and rm arguments appear in the same order as the
   3189 // arguments to asmand, which typically follows the call to asmvex.
   3190 // The final two arguments are the VEX prefix (see encoding above)
   3191 // and the opcode byte.
   3192 // For details about vex prefix see:
   3193 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
   3194 func asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
   3195 	ctxt.Vexflag = 1
   3196 	rexR := 0
   3197 	if r != nil {
   3198 		rexR = regrex[r.Reg] & Rxr
   3199 	}
   3200 	rexB := 0
   3201 	rexX := 0
   3202 	if rm != nil {
   3203 		rexB = regrex[rm.Reg] & Rxb
   3204 		rexX = regrex[rm.Index] & Rxx
   3205 	}
   3206 	vexM := (vex >> 3) & 0xF
   3207 	vexWLP := vex & 0x87
   3208 	vexV := byte(0)
   3209 	if v != nil {
   3210 		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
   3211 	}
   3212 	vexV ^= 0xF
   3213 	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
   3214 		// Can use 2-byte encoding.
   3215 		ctxt.AsmBuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
   3216 	} else {
   3217 		// Must use 3-byte encoding.
   3218 		ctxt.AsmBuf.Put3(0xc4,
   3219 			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
   3220 			vexV<<3|vexWLP,
   3221 		)
   3222 	}
   3223 	ctxt.AsmBuf.Put1(opcode)
   3224 }
   3225 
   3226 func doasm(ctxt *obj.Link, p *obj.Prog) {
   3227 	ctxt.Curp = p // TODO
   3228 
   3229 	o := opindex[p.As&obj.AMask]
   3230 
   3231 	if o == nil {
   3232 		ctxt.Diag("asmins: missing op %v", p)
   3233 		return
   3234 	}
   3235 
   3236 	pre := prefixof(ctxt, p, &p.From)
   3237 	if pre != 0 {
   3238 		ctxt.AsmBuf.Put1(byte(pre))
   3239 	}
   3240 	pre = prefixof(ctxt, p, &p.To)
   3241 	if pre != 0 {
   3242 		ctxt.AsmBuf.Put1(byte(pre))
   3243 	}
   3244 
   3245 	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
   3246 	// which encodes as SHRQ $32(DX*0), AX.
   3247 	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
   3248 	// Change encoding generated by assemblers and compilers and remove.
   3249 	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
   3250 		p.From3 = new(obj.Addr)
   3251 		p.From3.Type = obj.TYPE_REG
   3252 		p.From3.Reg = p.From.Index
   3253 		p.From.Index = 0
   3254 	}
   3255 
   3256 	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
   3257 	// Change encoding generated by assemblers and compilers (if any) and remove.
   3258 	switch p.As {
   3259 	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
   3260 		if p.From3Type() == obj.TYPE_NONE {
   3261 			p.From3 = new(obj.Addr)
   3262 			*p.From3 = p.From
   3263 			p.From = obj.Addr{}
   3264 			p.From.Type = obj.TYPE_CONST
   3265 			p.From.Offset = p.To.Offset
   3266 			p.To.Offset = 0
   3267 		}
   3268 	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
   3269 		if p.From3Type() == obj.TYPE_NONE {
   3270 			p.From3 = new(obj.Addr)
   3271 			*p.From3 = p.To
   3272 			p.To = obj.Addr{}
   3273 			p.To.Type = obj.TYPE_CONST
   3274 			p.To.Offset = p.From3.Offset
   3275 			p.From3.Offset = 0
   3276 		}
   3277 	}
   3278 
   3279 	if p.Ft == 0 {
   3280 		p.Ft = uint8(oclass(ctxt, p, &p.From))
   3281 	}
   3282 	if p.Tt == 0 {
   3283 		p.Tt = uint8(oclass(ctxt, p, &p.To))
   3284 	}
   3285 
   3286 	ft := int(p.Ft) * Ymax
   3287 	f3t := Ynone * Ymax
   3288 	if p.From3 != nil {
   3289 		f3t = oclass(ctxt, p, p.From3) * Ymax
   3290 	}
   3291 	tt := int(p.Tt) * Ymax
   3292 
   3293 	xo := obj.Bool2int(o.op[0] == 0x0f)
   3294 	z := 0
   3295 	var a *obj.Addr
   3296 	var l int
   3297 	var op int
   3298 	var q *obj.Prog
   3299 	var r *obj.Reloc
   3300 	var rel obj.Reloc
   3301 	var v int64
   3302 	for i := range o.ytab {
   3303 		yt := &o.ytab[i]
   3304 		if ycover[ft+int(yt.from)] != 0 && ycover[f3t+int(yt.from3)] != 0 && ycover[tt+int(yt.to)] != 0 {
   3305 			switch o.prefix {
   3306 			case Px1: /* first option valid only in 32-bit mode */
   3307 				if ctxt.Mode == 64 && z == 0 {
   3308 					z += int(yt.zoffset) + xo
   3309 					continue
   3310 				}
   3311 			case Pq: /* 16 bit escape and opcode escape */
   3312 				ctxt.AsmBuf.Put2(Pe, Pm)
   3313 
   3314 			case Pq3: /* 16 bit escape and opcode escape + REX.W */
   3315 				ctxt.Rexflag |= Pw
   3316 				ctxt.AsmBuf.Put2(Pe, Pm)
   3317 
   3318 			case Pq4: /*  66 0F 38 */
   3319 				ctxt.AsmBuf.Put3(0x66, 0x0F, 0x38)
   3320 
   3321 			case Pf2, /* xmm opcode escape */
   3322 				Pf3:
   3323 				ctxt.AsmBuf.Put2(o.prefix, Pm)
   3324 
   3325 			case Pef3:
   3326 				ctxt.AsmBuf.Put3(Pe, Pf3, Pm)
   3327 
   3328 			case Pfw: /* xmm opcode escape + REX.W */
   3329 				ctxt.Rexflag |= Pw
   3330 				ctxt.AsmBuf.Put2(Pf3, Pm)
   3331 
   3332 			case Pm: /* opcode escape */
   3333 				ctxt.AsmBuf.Put1(Pm)
   3334 
   3335 			case Pe: /* 16 bit escape */
   3336 				ctxt.AsmBuf.Put1(Pe)
   3337 
   3338 			case Pw: /* 64-bit escape */
   3339 				if p.Mode != 64 {
   3340 					ctxt.Diag("asmins: illegal 64: %v", p)
   3341 				}
   3342 				ctxt.Rexflag |= Pw
   3343 
   3344 			case Pw8: /* 64-bit escape if z >= 8 */
   3345 				if z >= 8 {
   3346 					if p.Mode != 64 {
   3347 						ctxt.Diag("asmins: illegal 64: %v", p)
   3348 					}
   3349 					ctxt.Rexflag |= Pw
   3350 				}
   3351 
   3352 			case Pb: /* botch */
   3353 				if p.Mode != 64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
   3354 					goto bad
   3355 				}
   3356 				// NOTE(rsc): This is probably safe to do always,
   3357 				// but when enabled it chooses different encodings
   3358 				// than the old cmd/internal/obj/i386 code did,
   3359 				// which breaks our "same bits out" checks.
   3360 				// In particular, CMPB AX, $0 encodes as 80 f8 00
   3361 				// in the original obj/i386, and it would encode
   3362 				// (using a valid, shorter form) as 3c 00 if we enabled
   3363 				// the call to bytereg here.
   3364 				if p.Mode == 64 {
   3365 					bytereg(&p.From, &p.Ft)
   3366 					bytereg(&p.To, &p.Tt)
   3367 				}
   3368 
   3369 			case P32: /* 32 bit but illegal if 64-bit mode */
   3370 				if p.Mode == 64 {
   3371 					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
   3372 				}
   3373 
   3374 			case Py: /* 64-bit only, no prefix */
   3375 				if p.Mode != 64 {
   3376 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3377 				}
   3378 
   3379 			case Py1: /* 64-bit only if z < 1, no prefix */
   3380 				if z < 1 && p.Mode != 64 {
   3381 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3382 				}
   3383 
   3384 			case Py3: /* 64-bit only if z < 3, no prefix */
   3385 				if z < 3 && p.Mode != 64 {
   3386 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", p.Mode, p)
   3387 				}
   3388 			}
   3389 
   3390 			if z >= len(o.op) {
   3391 				log.Fatalf("asmins bad table %v", p)
   3392 			}
   3393 			op = int(o.op[z])
   3394 			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
   3395 			if op == 0x0f && o.prefix != Pvex {
   3396 				ctxt.AsmBuf.Put1(byte(op))
   3397 				z++
   3398 				op = int(o.op[z])
   3399 			}
   3400 
   3401 			switch yt.zcase {
   3402 			default:
   3403 				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
   3404 				return
   3405 
   3406 			case Zpseudo:
   3407 				break
   3408 
   3409 			case Zlit:
   3410 				for ; ; z++ {
   3411 					op = int(o.op[z])
   3412 					if op == 0 {
   3413 						break
   3414 					}
   3415 					ctxt.AsmBuf.Put1(byte(op))
   3416 				}
   3417 
   3418 			case Zlitm_r:
   3419 				for ; ; z++ {
   3420 					op = int(o.op[z])
   3421 					if op == 0 {
   3422 						break
   3423 					}
   3424 					ctxt.AsmBuf.Put1(byte(op))
   3425 				}
   3426 				asmand(ctxt, p, &p.From, &p.To)
   3427 
   3428 			case Zmb_r:
   3429 				bytereg(&p.From, &p.Ft)
   3430 				fallthrough
   3431 
   3432 			case Zm_r:
   3433 				ctxt.AsmBuf.Put1(byte(op))
   3434 				asmand(ctxt, p, &p.From, &p.To)
   3435 
   3436 			case Zm2_r:
   3437 				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
   3438 				asmand(ctxt, p, &p.From, &p.To)
   3439 
   3440 			case Zm_r_xm:
   3441 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3442 				asmand(ctxt, p, &p.From, &p.To)
   3443 
   3444 			case Zm_r_xm_nr:
   3445 				ctxt.Rexflag = 0
   3446 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3447 				asmand(ctxt, p, &p.From, &p.To)
   3448 
   3449 			case Zm_r_i_xm:
   3450 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3451 				asmand(ctxt, p, &p.From, p.From3)
   3452 				ctxt.AsmBuf.Put1(byte(p.To.Offset))
   3453 
   3454 			case Zibm_r, Zibr_m:
   3455 				for {
   3456 					tmp1 := z
   3457 					z++
   3458 					op = int(o.op[tmp1])
   3459 					if op == 0 {
   3460 						break
   3461 					}
   3462 					ctxt.AsmBuf.Put1(byte(op))
   3463 				}
   3464 				if yt.zcase == Zibr_m {
   3465 					asmand(ctxt, p, &p.To, p.From3)
   3466 				} else {
   3467 					asmand(ctxt, p, p.From3, &p.To)
   3468 				}
   3469 				ctxt.AsmBuf.Put1(byte(p.From.Offset))
   3470 
   3471 			case Zaut_r:
   3472 				ctxt.AsmBuf.Put1(0x8d) // leal
   3473 				if p.From.Type != obj.TYPE_ADDR {
   3474 					ctxt.Diag("asmins: Zaut sb type ADDR")
   3475 				}
   3476 				p.From.Type = obj.TYPE_MEM
   3477 				asmand(ctxt, p, &p.From, &p.To)
   3478 				p.From.Type = obj.TYPE_ADDR
   3479 
   3480 			case Zm_o:
   3481 				ctxt.AsmBuf.Put1(byte(op))
   3482 				asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3483 
   3484 			case Zr_m:
   3485 				ctxt.AsmBuf.Put1(byte(op))
   3486 				asmand(ctxt, p, &p.To, &p.From)
   3487 
   3488 			case Zvex_rm_v_r:
   3489 				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
   3490 				asmand(ctxt, p, &p.From, &p.To)
   3491 
   3492 			case Zvex_i_r_v:
   3493 				asmvex(ctxt, p.From3, &p.To, nil, o.op[z], o.op[z+1])
   3494 				regnum := byte(0x7)
   3495 				if p.From3.Reg >= REG_X0 && p.From3.Reg <= REG_X15 {
   3496 					regnum &= byte(p.From3.Reg - REG_X0)
   3497 				} else {
   3498 					regnum &= byte(p.From3.Reg - REG_Y0)
   3499 				}
   3500 				ctxt.AsmBuf.Put1(byte(o.op[z+2]) | regnum)
   3501 				ctxt.AsmBuf.Put1(byte(p.From.Offset))
   3502 
   3503 			case Zvex_i_rm_v_r:
   3504 				asmvex(ctxt, &p.From, p.From3, &p.To, o.op[z], o.op[z+1])
   3505 				asmand(ctxt, p, &p.From, &p.To)
   3506 				ctxt.AsmBuf.Put1(byte(p.From3.Offset))
   3507 
   3508 			case Zvex_i_rm_r:
   3509 				asmvex(ctxt, p.From3, nil, &p.To, o.op[z], o.op[z+1])
   3510 				asmand(ctxt, p, p.From3, &p.To)
   3511 				ctxt.AsmBuf.Put1(byte(p.From.Offset))
   3512 
   3513 			case Zvex_v_rm_r:
   3514 				asmvex(ctxt, p.From3, &p.From, &p.To, o.op[z], o.op[z+1])
   3515 				asmand(ctxt, p, p.From3, &p.To)
   3516 
   3517 			case Zvex_r_v_rm:
   3518 				asmvex(ctxt, &p.To, p.From3, &p.From, o.op[z], o.op[z+1])
   3519 				asmand(ctxt, p, &p.To, &p.From)
   3520 
   3521 			case Zr_m_xm:
   3522 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3523 				asmand(ctxt, p, &p.To, &p.From)
   3524 
   3525 			case Zr_m_xm_nr:
   3526 				ctxt.Rexflag = 0
   3527 				mediaop(ctxt, o, op, int(yt.zoffset), z)
   3528 				asmand(ctxt, p, &p.To, &p.From)
   3529 
   3530 			case Zo_m:
   3531 				ctxt.AsmBuf.Put1(byte(op))
   3532 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3533 
   3534 			case Zcallindreg:
   3535 				r = obj.Addrel(ctxt.Cursym)
   3536 				r.Off = int32(p.Pc)
   3537 				r.Type = obj.R_CALLIND
   3538 				r.Siz = 0
   3539 				fallthrough
   3540 
   3541 			case Zo_m64:
   3542 				ctxt.AsmBuf.Put1(byte(op))
   3543 				asmandsz(ctxt, p, &p.To, int(o.op[z+1]), 0, 1)
   3544 
   3545 			case Zm_ibo:
   3546 				ctxt.AsmBuf.Put1(byte(op))
   3547 				asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3548 				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
   3549 
   3550 			case Zibo_m:
   3551 				ctxt.AsmBuf.Put1(byte(op))
   3552 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3553 				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   3554 
   3555 			case Zibo_m_xm:
   3556 				z = mediaop(ctxt, o, op, int(yt.zoffset), z)
   3557 				asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3558 				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   3559 
   3560 			case Z_ib, Zib_:
   3561 				if yt.zcase == Zib_ {
   3562 					a = &p.From
   3563 				} else {
   3564 					a = &p.To
   3565 				}
   3566 				ctxt.AsmBuf.Put1(byte(op))
   3567 				if p.As == AXABORT {
   3568 					ctxt.AsmBuf.Put1(o.op[z+1])
   3569 				}
   3570 				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, a, nil)))
   3571 
   3572 			case Zib_rp:
   3573 				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   3574 				ctxt.AsmBuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
   3575 
   3576 			case Zil_rp:
   3577 				ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3578 				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
   3579 				if o.prefix == Pe {
   3580 					v = vaddr(ctxt, p, &p.From, nil)
   3581 					ctxt.AsmBuf.PutInt16(int16(v))
   3582 				} else {
   3583 					relput4(ctxt, p, &p.From)
   3584 				}
   3585 
   3586 			case Zo_iw:
   3587 				ctxt.AsmBuf.Put1(byte(op))
   3588 				if p.From.Type != obj.TYPE_NONE {
   3589 					v = vaddr(ctxt, p, &p.From, nil)
   3590 					ctxt.AsmBuf.PutInt16(int16(v))
   3591 				}
   3592 
   3593 			case Ziq_rp:
   3594 				v = vaddr(ctxt, p, &p.From, &rel)
   3595 				l = int(v >> 32)
   3596 				if l == 0 && rel.Siz != 8 {
   3597 					//p->mark |= 0100;
   3598 					//print("zero: %llux %v\n", v, p);
   3599 					ctxt.Rexflag &^= (0x40 | Rxw)
   3600 
   3601 					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3602 					ctxt.AsmBuf.Put1(byte(0xb8 + reg[p.To.Reg]))
   3603 					if rel.Type != 0 {
   3604 						r = obj.Addrel(ctxt.Cursym)
   3605 						*r = rel
   3606 						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3607 					}
   3608 
   3609 					ctxt.AsmBuf.PutInt32(int32(v))
   3610 				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
   3611 
   3612 					//p->mark |= 0100;
   3613 					//print("sign: %llux %v\n", v, p);
   3614 					ctxt.AsmBuf.Put1(0xc7)
   3615 					asmando(ctxt, p, &p.To, 0)
   3616 
   3617 					ctxt.AsmBuf.PutInt32(int32(v)) // need all 8
   3618 				} else {
   3619 					//print("all: %llux %v\n", v, p);
   3620 					ctxt.Rexflag |= regrex[p.To.Reg] & Rxb
   3621 					ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
   3622 					if rel.Type != 0 {
   3623 						r = obj.Addrel(ctxt.Cursym)
   3624 						*r = rel
   3625 						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3626 					}
   3627 
   3628 					ctxt.AsmBuf.PutInt64(v)
   3629 				}
   3630 
   3631 			case Zib_rr:
   3632 				ctxt.AsmBuf.Put1(byte(op))
   3633 				asmand(ctxt, p, &p.To, &p.To)
   3634 				ctxt.AsmBuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   3635 
   3636 			case Z_il, Zil_:
   3637 				if yt.zcase == Zil_ {
   3638 					a = &p.From
   3639 				} else {
   3640 					a = &p.To
   3641 				}
   3642 				ctxt.AsmBuf.Put1(byte(op))
   3643 				if o.prefix == Pe {
   3644 					v = vaddr(ctxt, p, a, nil)
   3645 					ctxt.AsmBuf.PutInt16(int16(v))
   3646 				} else {
   3647 					relput4(ctxt, p, a)
   3648 				}
   3649 
   3650 			case Zm_ilo, Zilo_m:
   3651 				ctxt.AsmBuf.Put1(byte(op))
   3652 				if yt.zcase == Zilo_m {
   3653 					a = &p.From
   3654 					asmando(ctxt, p, &p.To, int(o.op[z+1]))
   3655 				} else {
   3656 					a = &p.To
   3657 					asmando(ctxt, p, &p.From, int(o.op[z+1]))
   3658 				}
   3659 
   3660 				if o.prefix == Pe {
   3661 					v = vaddr(ctxt, p, a, nil)
   3662 					ctxt.AsmBuf.PutInt16(int16(v))
   3663 				} else {
   3664 					relput4(ctxt, p, a)
   3665 				}
   3666 
   3667 			case Zil_rr:
   3668 				ctxt.AsmBuf.Put1(byte(op))
   3669 				asmand(ctxt, p, &p.To, &p.To)
   3670 				if o.prefix == Pe {
   3671 					v = vaddr(ctxt, p, &p.From, nil)
   3672 					ctxt.AsmBuf.PutInt16(int16(v))
   3673 				} else {
   3674 					relput4(ctxt, p, &p.From)
   3675 				}
   3676 
   3677 			case Z_rp:
   3678 				ctxt.Rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   3679 				ctxt.AsmBuf.Put1(byte(op + reg[p.To.Reg]))
   3680 
   3681 			case Zrp_:
   3682 				ctxt.Rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
   3683 				ctxt.AsmBuf.Put1(byte(op + reg[p.From.Reg]))
   3684 
   3685 			case Zclr:
   3686 				ctxt.Rexflag &^= Pw
   3687 				ctxt.AsmBuf.Put1(byte(op))
   3688 				asmand(ctxt, p, &p.To, &p.To)
   3689 
   3690 			case Zcallcon, Zjmpcon:
   3691 				if yt.zcase == Zcallcon {
   3692 					ctxt.AsmBuf.Put1(byte(op))
   3693 				} else {
   3694 					ctxt.AsmBuf.Put1(o.op[z+1])
   3695 				}
   3696 				r = obj.Addrel(ctxt.Cursym)
   3697 				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3698 				r.Type = obj.R_PCREL
   3699 				r.Siz = 4
   3700 				r.Add = p.To.Offset
   3701 				ctxt.AsmBuf.PutInt32(0)
   3702 
   3703 			case Zcallind:
   3704 				ctxt.AsmBuf.Put2(byte(op), o.op[z+1])
   3705 				r = obj.Addrel(ctxt.Cursym)
   3706 				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3707 				if p.Mode == 64 {
   3708 					r.Type = obj.R_PCREL
   3709 				} else {
   3710 					r.Type = obj.R_ADDR
   3711 				}
   3712 				r.Siz = 4
   3713 				r.Add = p.To.Offset
   3714 				r.Sym = p.To.Sym
   3715 				ctxt.AsmBuf.PutInt32(0)
   3716 
   3717 			case Zcall, Zcallduff:
   3718 				if p.To.Sym == nil {
   3719 					ctxt.Diag("call without target")
   3720 					log.Fatalf("bad code")
   3721 				}
   3722 
   3723 				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
   3724 					ctxt.Diag("directly calling duff when dynamically linking Go")
   3725 				}
   3726 
   3727 				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
   3728 					// Maintain BP around call, since duffcopy/duffzero can't do it
   3729 					// (the call jumps into the middle of the function).
   3730 					// This makes it possible to see call sites for duffcopy/duffzero in
   3731 					// BP-based profiling tools like Linux perf (which is the
   3732 					// whole point of obj.Framepointer_enabled).
   3733 					// MOVQ BP, -16(SP)
   3734 					// LEAQ -16(SP), BP
   3735 					ctxt.AsmBuf.Put(bpduff1)
   3736 				}
   3737 				ctxt.AsmBuf.Put1(byte(op))
   3738 				r = obj.Addrel(ctxt.Cursym)
   3739 				r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3740 				r.Sym = p.To.Sym
   3741 				r.Add = p.To.Offset
   3742 				r.Type = obj.R_CALL
   3743 				r.Siz = 4
   3744 				ctxt.AsmBuf.PutInt32(0)
   3745 
   3746 				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && p.Mode == 64 {
   3747 					// Pop BP pushed above.
   3748 					// MOVQ 0(BP), BP
   3749 					ctxt.AsmBuf.Put(bpduff2)
   3750 				}
   3751 
   3752 			// TODO: jump across functions needs reloc
   3753 			case Zbr, Zjmp, Zloop:
   3754 				if p.As == AXBEGIN {
   3755 					ctxt.AsmBuf.Put1(byte(op))
   3756 				}
   3757 				if p.To.Sym != nil {
   3758 					if yt.zcase != Zjmp {
   3759 						ctxt.Diag("branch to ATEXT")
   3760 						log.Fatalf("bad code")
   3761 					}
   3762 
   3763 					ctxt.AsmBuf.Put1(o.op[z+1])
   3764 					r = obj.Addrel(ctxt.Cursym)
   3765 					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3766 					r.Sym = p.To.Sym
   3767 					r.Type = obj.R_PCREL
   3768 					r.Siz = 4
   3769 					ctxt.AsmBuf.PutInt32(0)
   3770 					break
   3771 				}
   3772 
   3773 				// Assumes q is in this function.
   3774 				// TODO: Check in input, preserve in brchain.
   3775 
   3776 				// Fill in backward jump now.
   3777 				q = p.Pcond
   3778 
   3779 				if q == nil {
   3780 					ctxt.Diag("jmp/branch/loop without target")
   3781 					log.Fatalf("bad code")
   3782 				}
   3783 
   3784 				if p.Back&1 != 0 {
   3785 					v = q.Pc - (p.Pc + 2)
   3786 					if v >= -128 && p.As != AXBEGIN {
   3787 						if p.As == AJCXZL {
   3788 							ctxt.AsmBuf.Put1(0x67)
   3789 						}
   3790 						ctxt.AsmBuf.Put2(byte(op), byte(v))
   3791 					} else if yt.zcase == Zloop {
   3792 						ctxt.Diag("loop too far: %v", p)
   3793 					} else {
   3794 						v -= 5 - 2
   3795 						if p.As == AXBEGIN {
   3796 							v--
   3797 						}
   3798 						if yt.zcase == Zbr {
   3799 							ctxt.AsmBuf.Put1(0x0f)
   3800 							v--
   3801 						}
   3802 
   3803 						ctxt.AsmBuf.Put1(o.op[z+1])
   3804 						ctxt.AsmBuf.PutInt32(int32(v))
   3805 					}
   3806 
   3807 					break
   3808 				}
   3809 
   3810 				// Annotate target; will fill in later.
   3811 				p.Forwd = q.Rel
   3812 
   3813 				q.Rel = p
   3814 				if p.Back&2 != 0 && p.As != AXBEGIN { // short
   3815 					if p.As == AJCXZL {
   3816 						ctxt.AsmBuf.Put1(0x67)
   3817 					}
   3818 					ctxt.AsmBuf.Put2(byte(op), 0)
   3819 				} else if yt.zcase == Zloop {
   3820 					ctxt.Diag("loop too far: %v", p)
   3821 				} else {
   3822 					if yt.zcase == Zbr {
   3823 						ctxt.AsmBuf.Put1(0x0f)
   3824 					}
   3825 					ctxt.AsmBuf.Put1(o.op[z+1])
   3826 					ctxt.AsmBuf.PutInt32(0)
   3827 				}
   3828 
   3829 				break
   3830 
   3831 			/*
   3832 				v = q->pc - p->pc - 2;
   3833 				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
   3834 					*ctxt->andptr++ = op;
   3835 					*ctxt->andptr++ = v;
   3836 				} else {
   3837 					v -= 5-2;
   3838 					if(yt.zcase == Zbr) {
   3839 						*ctxt->andptr++ = 0x0f;
   3840 						v--;
   3841 					}
   3842 					*ctxt->andptr++ = o->op[z+1];
   3843 					*ctxt->andptr++ = v;
   3844 					*ctxt->andptr++ = v>>8;
   3845 					*ctxt->andptr++ = v>>16;
   3846 					*ctxt->andptr++ = v>>24;
   3847 				}
   3848 			*/
   3849 
   3850 			case Zbyte:
   3851 				v = vaddr(ctxt, p, &p.From, &rel)
   3852 				if rel.Siz != 0 {
   3853 					rel.Siz = uint8(op)
   3854 					r = obj.Addrel(ctxt.Cursym)
   3855 					*r = rel
   3856 					r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   3857 				}
   3858 
   3859 				ctxt.AsmBuf.Put1(byte(v))
   3860 				if op > 1 {
   3861 					ctxt.AsmBuf.Put1(byte(v >> 8))
   3862 					if op > 2 {
   3863 						ctxt.AsmBuf.PutInt16(int16(v >> 16))
   3864 						if op > 4 {
   3865 							ctxt.AsmBuf.PutInt32(int32(v >> 32))
   3866 						}
   3867 					}
   3868 				}
   3869 			}
   3870 
   3871 			return
   3872 		}
   3873 		z += int(yt.zoffset) + xo
   3874 	}
   3875 	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
   3876 		var pp obj.Prog
   3877 		var t []byte
   3878 		if p.As == mo[0].as {
   3879 			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
   3880 				t = mo[0].op[:]
   3881 				switch mo[0].code {
   3882 				default:
   3883 					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
   3884 
   3885 				case 0: /* lit */
   3886 					for z = 0; t[z] != E; z++ {
   3887 						ctxt.AsmBuf.Put1(t[z])
   3888 					}
   3889 
   3890 				case 1: /* r,m */
   3891 					ctxt.AsmBuf.Put1(t[0])
   3892 					asmando(ctxt, p, &p.To, int(t[1]))
   3893 
   3894 				case 2: /* m,r */
   3895 					ctxt.AsmBuf.Put1(t[0])
   3896 					asmando(ctxt, p, &p.From, int(t[1]))
   3897 
   3898 				case 3: /* r,m - 2op */
   3899 					ctxt.AsmBuf.Put2(t[0], t[1])
   3900 					asmando(ctxt, p, &p.To, int(t[2]))
   3901 					ctxt.Rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
   3902 
   3903 				case 4: /* m,r - 2op */
   3904 					ctxt.AsmBuf.Put2(t[0], t[1])
   3905 					asmando(ctxt, p, &p.From, int(t[2]))
   3906 					ctxt.Rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
   3907 
   3908 				case 5: /* load full pointer, trash heap */
   3909 					if t[0] != 0 {
   3910 						ctxt.AsmBuf.Put1(t[0])
   3911 					}
   3912 					switch p.To.Index {
   3913 					default:
   3914 						goto bad
   3915 
   3916 					case REG_DS:
   3917 						ctxt.AsmBuf.Put1(0xc5)
   3918 
   3919 					case REG_SS:
   3920 						ctxt.AsmBuf.Put2(0x0f, 0xb2)
   3921 
   3922 					case REG_ES:
   3923 						ctxt.AsmBuf.Put1(0xc4)
   3924 
   3925 					case REG_FS:
   3926 						ctxt.AsmBuf.Put2(0x0f, 0xb4)
   3927 
   3928 					case REG_GS:
   3929 						ctxt.AsmBuf.Put2(0x0f, 0xb5)
   3930 					}
   3931 
   3932 					asmand(ctxt, p, &p.From, &p.To)
   3933 
   3934 				case 6: /* double shift */
   3935 					if t[0] == Pw {
   3936 						if p.Mode != 64 {
   3937 							ctxt.Diag("asmins: illegal 64: %v", p)
   3938 						}
   3939 						ctxt.Rexflag |= Pw
   3940 						t = t[1:]
   3941 					} else if t[0] == Pe {
   3942 						ctxt.AsmBuf.Put1(Pe)
   3943 						t = t[1:]
   3944 					}
   3945 
   3946 					switch p.From.Type {
   3947 					default:
   3948 						goto bad
   3949 
   3950 					case obj.TYPE_CONST:
   3951 						ctxt.AsmBuf.Put2(0x0f, t[0])
   3952 						asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
   3953 						ctxt.AsmBuf.Put1(byte(p.From.Offset))
   3954 
   3955 					case obj.TYPE_REG:
   3956 						switch p.From.Reg {
   3957 						default:
   3958 							goto bad
   3959 
   3960 						case REG_CL, REG_CX:
   3961 							ctxt.AsmBuf.Put2(0x0f, t[1])
   3962 							asmandsz(ctxt, p, &p.To, reg[p.From3.Reg], regrex[p.From3.Reg], 0)
   3963 						}
   3964 					}
   3965 
   3966 				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   3967 				// where you load the TLS base register into a register and then index off that
   3968 				// register to access the actual TLS variables. Systems that allow direct TLS access
   3969 				// are handled in prefixof above and should not be listed here.
   3970 				case 7: /* mov tls, r */
   3971 					if p.Mode == 64 && p.As != AMOVQ || p.Mode == 32 && p.As != AMOVL {
   3972 						ctxt.Diag("invalid load of TLS: %v", p)
   3973 					}
   3974 
   3975 					if p.Mode == 32 {
   3976 						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   3977 						// where you load the TLS base register into a register and then index off that
   3978 						// register to access the actual TLS variables. Systems that allow direct TLS access
   3979 						// are handled in prefixof above and should not be listed here.
   3980 						switch ctxt.Headtype {
   3981 						default:
   3982 							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   3983 
   3984 						case obj.Hlinux,
   3985 							obj.Hnacl:
   3986 							if ctxt.Flag_shared {
   3987 								// Note that this is not generating the same insns as the other cases.
   3988 								//     MOV TLS, dst
   3989 								// becomes
   3990 								//     call __x86.get_pc_thunk.dst
   3991 								//     movl (gotpc + g@gotntpoff)(dst), dst
   3992 								// which is encoded as
   3993 								//     call __x86.get_pc_thunk.dst
   3994 								//     movq 0(dst), dst
   3995 								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
   3996 								// is g, which we can't check here, but will when we assemble the second
   3997 								// instruction.
   3998 								dst := p.To.Reg
   3999 								ctxt.AsmBuf.Put1(0xe8)
   4000 								r = obj.Addrel(ctxt.Cursym)
   4001 								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   4002 								r.Type = obj.R_CALL
   4003 								r.Siz = 4
   4004 								r.Sym = obj.Linklookup(ctxt, "__x86.get_pc_thunk."+strings.ToLower(Rconv(int(dst))), 0)
   4005 								ctxt.AsmBuf.PutInt32(0)
   4006 
   4007 								ctxt.AsmBuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
   4008 								r = obj.Addrel(ctxt.Cursym)
   4009 								r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   4010 								r.Type = obj.R_TLS_IE
   4011 								r.Siz = 4
   4012 								r.Add = 2
   4013 								ctxt.AsmBuf.PutInt32(0)
   4014 							} else {
   4015 								// ELF TLS base is 0(GS).
   4016 								pp.From = p.From
   4017 
   4018 								pp.From.Type = obj.TYPE_MEM
   4019 								pp.From.Reg = REG_GS
   4020 								pp.From.Offset = 0
   4021 								pp.From.Index = REG_NONE
   4022 								pp.From.Scale = 0
   4023 								ctxt.AsmBuf.Put2(0x65, // GS
   4024 									0x8B)
   4025 								asmand(ctxt, p, &pp.From, &p.To)
   4026 							}
   4027 						case obj.Hplan9:
   4028 							if ctxt.Plan9privates == nil {
   4029 								ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
   4030 							}
   4031 							pp.From = obj.Addr{}
   4032 							pp.From.Type = obj.TYPE_MEM
   4033 							pp.From.Name = obj.NAME_EXTERN
   4034 							pp.From.Sym = ctxt.Plan9privates
   4035 							pp.From.Offset = 0
   4036 							pp.From.Index = REG_NONE
   4037 							ctxt.AsmBuf.Put1(0x8B)
   4038 							asmand(ctxt, p, &pp.From, &p.To)
   4039 
   4040 						case obj.Hwindows, obj.Hwindowsgui:
   4041 							// Windows TLS base is always 0x14(FS).
   4042 							pp.From = p.From
   4043 
   4044 							pp.From.Type = obj.TYPE_MEM
   4045 							pp.From.Reg = REG_FS
   4046 							pp.From.Offset = 0x14
   4047 							pp.From.Index = REG_NONE
   4048 							pp.From.Scale = 0
   4049 							ctxt.AsmBuf.Put2(0x64, // FS
   4050 								0x8B)
   4051 							asmand(ctxt, p, &pp.From, &p.To)
   4052 						}
   4053 						break
   4054 					}
   4055 
   4056 					switch ctxt.Headtype {
   4057 					default:
   4058 						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   4059 
   4060 					case obj.Hlinux:
   4061 						if !ctxt.Flag_shared {
   4062 							log.Fatalf("unknown TLS base location for linux without -shared")
   4063 						}
   4064 						// Note that this is not generating the same insn as the other cases.
   4065 						//     MOV TLS, R_to
   4066 						// becomes
   4067 						//     movq g@gottpoff(%rip), R_to
   4068 						// which is encoded as
   4069 						//     movq 0(%rip), R_to
   4070 						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
   4071 						// is g, which we can't check here, but will when we assemble the second
   4072 						// instruction.
   4073 						ctxt.Rexflag = Pw | (regrex[p.To.Reg] & Rxr)
   4074 
   4075 						ctxt.AsmBuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
   4076 						r = obj.Addrel(ctxt.Cursym)
   4077 						r.Off = int32(p.Pc + int64(ctxt.AsmBuf.Len()))
   4078 						r.Type = obj.R_TLS_IE
   4079 						r.Siz = 4
   4080 						r.Add = -4
   4081 						ctxt.AsmBuf.PutInt32(0)
   4082 
   4083 					case obj.Hplan9:
   4084 						if ctxt.Plan9privates == nil {
   4085 							ctxt.Plan9privates = obj.Linklookup(ctxt, "_privates", 0)
   4086 						}
   4087 						pp.From = obj.Addr{}
   4088 						pp.From.Type = obj.TYPE_MEM
   4089 						pp.From.Name = obj.NAME_EXTERN
   4090 						pp.From.Sym = ctxt.Plan9privates
   4091 						pp.From.Offset = 0
   4092 						pp.From.Index = REG_NONE
   4093 						ctxt.Rexflag |= Pw
   4094 						ctxt.AsmBuf.Put1(0x8B)
   4095 						asmand(ctxt, p, &pp.From, &p.To)
   4096 
   4097 					case obj.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
   4098 						// TLS base is 0(FS).
   4099 						pp.From = p.From
   4100 
   4101 						pp.From.Type = obj.TYPE_MEM
   4102 						pp.From.Name = obj.NAME_NONE
   4103 						pp.From.Reg = REG_NONE
   4104 						pp.From.Offset = 0
   4105 						pp.From.Index = REG_NONE
   4106 						pp.From.Scale = 0
   4107 						ctxt.Rexflag |= Pw
   4108 						ctxt.AsmBuf.Put2(0x64, // FS
   4109 							0x8B)
   4110 						asmand(ctxt, p, &pp.From, &p.To)
   4111 
   4112 					case obj.Hwindows, obj.Hwindowsgui:
   4113 						// Windows TLS base is always 0x28(GS).
   4114 						pp.From = p.From
   4115 
   4116 						pp.From.Type = obj.TYPE_MEM
   4117 						pp.From.Name = obj.NAME_NONE
   4118 						pp.From.Reg = REG_GS
   4119 						pp.From.Offset = 0x28
   4120 						pp.From.Index = REG_NONE
   4121 						pp.From.Scale = 0
   4122 						ctxt.Rexflag |= Pw
   4123 						ctxt.AsmBuf.Put2(0x65, // GS
   4124 							0x8B)
   4125 						asmand(ctxt, p, &pp.From, &p.To)
   4126 					}
   4127 				}
   4128 				return
   4129 			}
   4130 		}
   4131 	}
   4132 	goto bad
   4133 
   4134 bad:
   4135 	if p.Mode != 64 {
   4136 		/*
   4137 		 * here, the assembly has failed.
   4138 		 * if its a byte instruction that has
   4139 		 * unaddressable registers, try to
   4140 		 * exchange registers and reissue the
   4141 		 * instruction with the operands renamed.
   4142 		 */
   4143 		pp := *p
   4144 
   4145 		unbytereg(&pp.From, &pp.Ft)
   4146 		unbytereg(&pp.To, &pp.Tt)
   4147 
   4148 		z := int(p.From.Reg)
   4149 		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   4150 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   4151 			// For now, different to keep bit-for-bit compatibility.
   4152 			if p.Mode == 32 {
   4153 				breg := byteswapreg(ctxt, &p.To)
   4154 				if breg != REG_AX {
   4155 					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
   4156 					asmando(ctxt, p, &p.From, reg[breg])
   4157 					subreg(&pp, z, breg)
   4158 					doasm(ctxt, &pp)
   4159 					ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
   4160 					asmando(ctxt, p, &p.From, reg[breg])
   4161 				} else {
   4162 					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4163 					subreg(&pp, z, REG_AX)
   4164 					doasm(ctxt, &pp)
   4165 					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4166 				}
   4167 				return
   4168 			}
   4169 
   4170 			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
   4171 				// We certainly don't want to exchange
   4172 				// with AX if the op is MUL or DIV.
   4173 				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
   4174 				asmando(ctxt, p, &p.From, reg[REG_BX])
   4175 				subreg(&pp, z, REG_BX)
   4176 				doasm(ctxt, &pp)
   4177 				ctxt.AsmBuf.Put1(0x87) // xchg lhs,bx
   4178 				asmando(ctxt, p, &p.From, reg[REG_BX])
   4179 			} else {
   4180 				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4181 				subreg(&pp, z, REG_AX)
   4182 				doasm(ctxt, &pp)
   4183 				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4184 			}
   4185 			return
   4186 		}
   4187 
   4188 		z = int(p.To.Reg)
   4189 		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   4190 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   4191 			// For now, different to keep bit-for-bit compatibility.
   4192 			if p.Mode == 32 {
   4193 				breg := byteswapreg(ctxt, &p.From)
   4194 				if breg != REG_AX {
   4195 					ctxt.AsmBuf.Put1(0x87) //xchg rhs,bx
   4196 					asmando(ctxt, p, &p.To, reg[breg])
   4197 					subreg(&pp, z, breg)
   4198 					doasm(ctxt, &pp)
   4199 					ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
   4200 					asmando(ctxt, p, &p.To, reg[breg])
   4201 				} else {
   4202 					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4203 					subreg(&pp, z, REG_AX)
   4204 					doasm(ctxt, &pp)
   4205 					ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4206 				}
   4207 				return
   4208 			}
   4209 
   4210 			if isax(&p.From) {
   4211 				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
   4212 				asmando(ctxt, p, &p.To, reg[REG_BX])
   4213 				subreg(&pp, z, REG_BX)
   4214 				doasm(ctxt, &pp)
   4215 				ctxt.AsmBuf.Put1(0x87) // xchg rhs,bx
   4216 				asmando(ctxt, p, &p.To, reg[REG_BX])
   4217 			} else {
   4218 				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4219 				subreg(&pp, z, REG_AX)
   4220 				doasm(ctxt, &pp)
   4221 				ctxt.AsmBuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4222 			}
   4223 			return
   4224 		}
   4225 	}
   4226 
   4227 	ctxt.Diag("invalid instruction: %v", p)
   4228 	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
   4229 	return
   4230 }
   4231 
   4232 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
   4233 // which is not referenced in a.
   4234 // If a is empty, it returns BX to account for MULB-like instructions
   4235 // that might use DX and AX.
   4236 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
   4237 	cand := 1
   4238 	canc := cand
   4239 	canb := canc
   4240 	cana := canb
   4241 
   4242 	if a.Type == obj.TYPE_NONE {
   4243 		cand = 0
   4244 		cana = cand
   4245 	}
   4246 
   4247 	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
   4248 		switch a.Reg {
   4249 		case REG_NONE:
   4250 			cand = 0
   4251 			cana = cand
   4252 
   4253 		case REG_AX, REG_AL, REG_AH:
   4254 			cana = 0
   4255 
   4256 		case REG_BX, REG_BL, REG_BH:
   4257 			canb = 0
   4258 
   4259 		case REG_CX, REG_CL, REG_CH:
   4260 			canc = 0
   4261 
   4262 		case REG_DX, REG_DL, REG_DH:
   4263 			cand = 0
   4264 		}
   4265 	}
   4266 
   4267 	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
   4268 		switch a.Index {
   4269 		case REG_AX:
   4270 			cana = 0
   4271 
   4272 		case REG_BX:
   4273 			canb = 0
   4274 
   4275 		case REG_CX:
   4276 			canc = 0
   4277 
   4278 		case REG_DX:
   4279 			cand = 0
   4280 		}
   4281 	}
   4282 
   4283 	if cana != 0 {
   4284 		return REG_AX
   4285 	}
   4286 	if canb != 0 {
   4287 		return REG_BX
   4288 	}
   4289 	if canc != 0 {
   4290 		return REG_CX
   4291 	}
   4292 	if cand != 0 {
   4293 		return REG_DX
   4294 	}
   4295 
   4296 	ctxt.Diag("impossible byte register")
   4297 	log.Fatalf("bad code")
   4298 	return 0
   4299 }
   4300 
   4301 func isbadbyte(a *obj.Addr) bool {
   4302 	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
   4303 }
   4304 
   4305 var naclret = []uint8{
   4306 	0x5e, // POPL SI
   4307 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4308 	0x83,
   4309 	0xe6,
   4310 	0xe0, // ANDL $~31, SI
   4311 	0x4c,
   4312 	0x01,
   4313 	0xfe, // ADDQ R15, SI
   4314 	0xff,
   4315 	0xe6, // JMP SI
   4316 }
   4317 
   4318 var naclret8 = []uint8{
   4319 	0x5d, // POPL BP
   4320 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4321 	0x83,
   4322 	0xe5,
   4323 	0xe0, // ANDL $~31, BP
   4324 	0xff,
   4325 	0xe5, // JMP BP
   4326 }
   4327 
   4328 var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
   4329 
   4330 var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
   4331 
   4332 var naclmovs = []uint8{
   4333 	0x89,
   4334 	0xf6, // MOVL SI, SI
   4335 	0x49,
   4336 	0x8d,
   4337 	0x34,
   4338 	0x37, // LEAQ (R15)(SI*1), SI
   4339 	0x89,
   4340 	0xff, // MOVL DI, DI
   4341 	0x49,
   4342 	0x8d,
   4343 	0x3c,
   4344 	0x3f, // LEAQ (R15)(DI*1), DI
   4345 }
   4346 
   4347 var naclstos = []uint8{
   4348 	0x89,
   4349 	0xff, // MOVL DI, DI
   4350 	0x49,
   4351 	0x8d,
   4352 	0x3c,
   4353 	0x3f, // LEAQ (R15)(DI*1), DI
   4354 }
   4355 
   4356 func nacltrunc(ctxt *obj.Link, reg int) {
   4357 	if reg >= REG_R8 {
   4358 		ctxt.AsmBuf.Put1(0x45)
   4359 	}
   4360 	reg = (reg - REG_AX) & 7
   4361 	ctxt.AsmBuf.Put2(0x89, byte(3<<6|reg<<3|reg))
   4362 }
   4363 
   4364 func asmins(ctxt *obj.Link, p *obj.Prog) {
   4365 	ctxt.AsmBuf.Reset()
   4366 	ctxt.Asmode = int(p.Mode)
   4367 
   4368 	if ctxt.Headtype == obj.Hnacl && p.Mode == 32 {
   4369 		switch p.As {
   4370 		case obj.ARET:
   4371 			ctxt.AsmBuf.Put(naclret8)
   4372 			return
   4373 
   4374 		case obj.ACALL,
   4375 			obj.AJMP:
   4376 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4377 				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
   4378 			}
   4379 
   4380 		case AINT:
   4381 			ctxt.AsmBuf.Put1(0xf4)
   4382 			return
   4383 		}
   4384 	}
   4385 
   4386 	if ctxt.Headtype == obj.Hnacl && p.Mode == 64 {
   4387 		if p.As == AREP {
   4388 			ctxt.Rep++
   4389 			return
   4390 		}
   4391 
   4392 		if p.As == AREPN {
   4393 			ctxt.Repn++
   4394 			return
   4395 		}
   4396 
   4397 		if p.As == ALOCK {
   4398 			ctxt.Lock++
   4399 			return
   4400 		}
   4401 
   4402 		if p.As != ALEAQ && p.As != ALEAL {
   4403 			if p.From.Index != REG_NONE && p.From.Scale > 0 {
   4404 				nacltrunc(ctxt, int(p.From.Index))
   4405 			}
   4406 			if p.To.Index != REG_NONE && p.To.Scale > 0 {
   4407 				nacltrunc(ctxt, int(p.To.Index))
   4408 			}
   4409 		}
   4410 
   4411 		switch p.As {
   4412 		case obj.ARET:
   4413 			ctxt.AsmBuf.Put(naclret)
   4414 			return
   4415 
   4416 		case obj.ACALL,
   4417 			obj.AJMP:
   4418 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4419 				// ANDL $~31, reg
   4420 				ctxt.AsmBuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
   4421 				// ADDQ R15, reg
   4422 				ctxt.AsmBuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
   4423 			}
   4424 
   4425 			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
   4426 				// ANDL $~31, reg
   4427 				ctxt.AsmBuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
   4428 				// ADDQ R15, reg
   4429 				ctxt.AsmBuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
   4430 			}
   4431 
   4432 		case AINT:
   4433 			ctxt.AsmBuf.Put1(0xf4)
   4434 			return
   4435 
   4436 		case ASCASB,
   4437 			ASCASW,
   4438 			ASCASL,
   4439 			ASCASQ,
   4440 			ASTOSB,
   4441 			ASTOSW,
   4442 			ASTOSL,
   4443 			ASTOSQ:
   4444 			ctxt.AsmBuf.Put(naclstos)
   4445 
   4446 		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
   4447 			ctxt.AsmBuf.Put(naclmovs)
   4448 		}
   4449 
   4450 		if ctxt.Rep != 0 {
   4451 			ctxt.AsmBuf.Put1(0xf3)
   4452 			ctxt.Rep = 0
   4453 		}
   4454 
   4455 		if ctxt.Repn != 0 {
   4456 			ctxt.AsmBuf.Put1(0xf2)
   4457 			ctxt.Repn = 0
   4458 		}
   4459 
   4460 		if ctxt.Lock != 0 {
   4461 			ctxt.AsmBuf.Put1(0xf0)
   4462 			ctxt.Lock = 0
   4463 		}
   4464 	}
   4465 
   4466 	ctxt.Rexflag = 0
   4467 	ctxt.Vexflag = 0
   4468 	mark := ctxt.AsmBuf.Len()
   4469 	ctxt.Asmode = int(p.Mode)
   4470 	doasm(ctxt, p)
   4471 	if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
   4472 		/*
   4473 		 * as befits the whole approach of the architecture,
   4474 		 * the rex prefix must appear before the first opcode byte
   4475 		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
   4476 		 * before the 0f opcode escape!), or it might be ignored.
   4477 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
   4478 		 */
   4479 		if p.Mode != 64 {
   4480 			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", p.Mode, p, p.Ft, p.Tt)
   4481 		}
   4482 		n := ctxt.AsmBuf.Len()
   4483 		var np int
   4484 		for np = mark; np < n; np++ {
   4485 			c := ctxt.AsmBuf.Peek(np)
   4486 			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
   4487 				break
   4488 			}
   4489 		}
   4490 		ctxt.AsmBuf.Insert(np, byte(0x40|ctxt.Rexflag))
   4491 	}
   4492 
   4493 	n := ctxt.AsmBuf.Len()
   4494 	for i := len(ctxt.Cursym.R) - 1; i >= 0; i-- {
   4495 		r := &ctxt.Cursym.R[i]
   4496 		if int64(r.Off) < p.Pc {
   4497 			break
   4498 		}
   4499 		if ctxt.Rexflag != 0 {
   4500 			r.Off++
   4501 		}
   4502 		if r.Type == obj.R_PCREL {
   4503 			if p.Mode == 64 || p.As == obj.AJMP || p.As == obj.ACALL {
   4504 				// PC-relative addressing is relative to the end of the instruction,
   4505 				// but the relocations applied by the linker are relative to the end
   4506 				// of the relocation. Because immediate instruction
   4507 				// arguments can follow the PC-relative memory reference in the
   4508 				// instruction encoding, the two may not coincide. In this case,
   4509 				// adjust addend so that linker can keep relocating relative to the
   4510 				// end of the relocation.
   4511 				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
   4512 			} else if p.Mode == 32 {
   4513 				// On 386 PC-relative addressing (for non-call/jmp instructions)
   4514 				// assumes that the previous instruction loaded the PC of the end
   4515 				// of that instruction into CX, so the adjustment is relative to
   4516 				// that.
   4517 				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   4518 			}
   4519 		}
   4520 		if r.Type == obj.R_GOTPCREL && p.Mode == 32 {
   4521 			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
   4522 			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   4523 		}
   4524 
   4525 	}
   4526 
   4527 	if p.Mode == 64 && ctxt.Headtype == obj.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
   4528 		switch p.To.Reg {
   4529 		case REG_SP:
   4530 			ctxt.AsmBuf.Put(naclspfix)
   4531 		case REG_BP:
   4532 			ctxt.AsmBuf.Put(naclbpfix)
   4533 		}
   4534 	}
   4535 }
   4536