Home | History | Annotate | Download | only in x86
      1 // Inferno utils/6l/span.c
      2 // https://bitbucket.org/inferno-os/inferno-os/src/default/utils/6l/span.c
      3 //
      4 //	Copyright  1994-1999 Lucent Technologies Inc.  All rights reserved.
      5 //	Portions Copyright  1995-1997 C H Forsyth (forsyth (a] terzarima.net)
      6 //	Portions Copyright  1997-1999 Vita Nuova Limited
      7 //	Portions Copyright  2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
      8 //	Portions Copyright  2004,2006 Bruce Ellis
      9 //	Portions Copyright  2005-2007 C H Forsyth (forsyth (a] terzarima.net)
     10 //	Revisions Copyright  2000-2007 Lucent Technologies Inc. and others
     11 //	Portions Copyright  2009 The Go Authors. All rights reserved.
     12 //
     13 // Permission is hereby granted, free of charge, to any person obtaining a copy
     14 // of this software and associated documentation files (the "Software"), to deal
     15 // in the Software without restriction, including without limitation the rights
     16 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     17 // copies of the Software, and to permit persons to whom the Software is
     18 // furnished to do so, subject to the following conditions:
     19 //
     20 // The above copyright notice and this permission notice shall be included in
     21 // all copies or substantial portions of the Software.
     22 //
     23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     24 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     25 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     26 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     27 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     28 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     29 // THE SOFTWARE.
     30 
     31 package x86
     32 
     33 import (
     34 	"cmd/internal/obj"
     35 	"cmd/internal/objabi"
     36 	"cmd/internal/sys"
     37 	"encoding/binary"
     38 	"fmt"
     39 	"log"
     40 	"strings"
     41 )
     42 
     43 var (
     44 	plan9privates *obj.LSym
     45 	deferreturn   *obj.LSym
     46 )
     47 
     48 // Instruction layout.
     49 
     50 const (
     51 	// Loop alignment constants:
     52 	// want to align loop entry to LoopAlign-byte boundary,
     53 	// and willing to insert at most MaxLoopPad bytes of NOP to do so.
     54 	// We define a loop entry as the target of a backward jump.
     55 	//
     56 	// gcc uses MaxLoopPad = 10 for its 'generic x86-64' config,
     57 	// and it aligns all jump targets, not just backward jump targets.
     58 	//
     59 	// As of 6/1/2012, the effect of setting MaxLoopPad = 10 here
     60 	// is very slight but negative, so the alignment is disabled by
     61 	// setting MaxLoopPad = 0. The code is here for reference and
     62 	// for future experiments.
     63 	//
     64 	LoopAlign  = 16
     65 	MaxLoopPad = 0
     66 )
     67 
     68 type Optab struct {
     69 	as     obj.As
     70 	ytab   []ytab
     71 	prefix uint8
     72 	op     [23]uint8
     73 }
     74 
     75 type Movtab struct {
     76 	as   obj.As
     77 	ft   uint8
     78 	f3t  uint8
     79 	tt   uint8
     80 	code uint8
     81 	op   [4]uint8
     82 }
     83 
     84 const (
     85 	Yxxx = iota
     86 	Ynone
     87 	Yi0 // $0
     88 	Yi1 // $1
     89 	Yu2 // $x, x fits in uint2
     90 	Yi8 // $x, x fits in int8
     91 	Yu8 // $x, x fits in uint8
     92 	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
     93 	Ys32
     94 	Yi32
     95 	Yi64
     96 	Yiauto
     97 	Yal
     98 	Ycl
     99 	Yax
    100 	Ycx
    101 	Yrb
    102 	Yrl
    103 	Yrl32 // Yrl on 32-bit system
    104 	Yrf
    105 	Yf0
    106 	Yrx
    107 	Ymb
    108 	Yml
    109 	Ym
    110 	Ybr
    111 	Ycs
    112 	Yss
    113 	Yds
    114 	Yes
    115 	Yfs
    116 	Ygs
    117 	Ygdtr
    118 	Yidtr
    119 	Yldtr
    120 	Ymsw
    121 	Ytask
    122 	Ycr0
    123 	Ycr1
    124 	Ycr2
    125 	Ycr3
    126 	Ycr4
    127 	Ycr5
    128 	Ycr6
    129 	Ycr7
    130 	Ycr8
    131 	Ydr0
    132 	Ydr1
    133 	Ydr2
    134 	Ydr3
    135 	Ydr4
    136 	Ydr5
    137 	Ydr6
    138 	Ydr7
    139 	Ytr0
    140 	Ytr1
    141 	Ytr2
    142 	Ytr3
    143 	Ytr4
    144 	Ytr5
    145 	Ytr6
    146 	Ytr7
    147 	Ymr
    148 	Ymm
    149 	Yxr
    150 	Yxm
    151 	Yxvm // VSIB vector array; vm32x/vm64x
    152 	Yyr
    153 	Yym
    154 	Yyvm // VSIB vector array; vm32y/vm64y
    155 	Ytls
    156 	Ytextsize
    157 	Yindir
    158 	Ymax
    159 )
    160 
    161 const (
    162 	Zxxx = iota
    163 	Zlit
    164 	Zlitm_r
    165 	Z_rp
    166 	Zbr
    167 	Zcall
    168 	Zcallcon
    169 	Zcallduff
    170 	Zcallind
    171 	Zcallindreg
    172 	Zib_
    173 	Zib_rp
    174 	Zibo_m
    175 	Zibo_m_xm
    176 	Zil_
    177 	Zil_rp
    178 	Ziq_rp
    179 	Zilo_m
    180 	Zjmp
    181 	Zjmpcon
    182 	Zloop
    183 	Zo_iw
    184 	Zm_o
    185 	Zm_r
    186 	Zm2_r
    187 	Zm_r_xm
    188 	Zm_r_i_xm
    189 	Zm_r_xm_nr
    190 	Zr_m_xm_nr
    191 	Zibm_r /* mmx1,mmx2/mem64,imm8 */
    192 	Zibr_m
    193 	Zmb_r
    194 	Zaut_r
    195 	Zo_m
    196 	Zo_m64
    197 	Zpseudo
    198 	Zr_m
    199 	Zr_m_xm
    200 	Zrp_
    201 	Z_ib
    202 	Z_il
    203 	Zm_ibo
    204 	Zm_ilo
    205 	Zib_rr
    206 	Zil_rr
    207 	Zbyte
    208 	Zvex_rm_v_r
    209 	Zvex_rm_v_ro
    210 	Zvex_r_v_rm
    211 	Zvex_v_rm_r
    212 	Zvex_i_rm_r
    213 	Zvex_i_r_v
    214 	Zvex_i_rm_v_r
    215 	Zvex
    216 	Zvex_rm_r_vo
    217 	Zvex_i_r_rm
    218 	Zvex_hr_rm_v_r
    219 
    220 	Zmax
    221 )
    222 
    223 const (
    224 	Px   = 0
    225 	Px1  = 1    // symbolic; exact value doesn't matter
    226 	P32  = 0x32 /* 32-bit only */
    227 	Pe   = 0x66 /* operand escape */
    228 	Pm   = 0x0f /* 2byte opcode escape */
    229 	Pq   = 0xff /* both escapes: 66 0f */
    230 	Pb   = 0xfe /* byte operands */
    231 	Pf2  = 0xf2 /* xmm escape 1: f2 0f */
    232 	Pf3  = 0xf3 /* xmm escape 2: f3 0f */
    233 	Pef3 = 0xf5 /* xmm escape 2 with 16-bit prefix: 66 f3 0f */
    234 	Pq3  = 0x67 /* xmm escape 3: 66 48 0f */
    235 	Pq4  = 0x68 /* xmm escape 4: 66 0F 38 */
    236 	Pq4w = 0x69 /* Pq4 with Rex.w 66 0F 38 */
    237 	Pq5  = 0x6a /* xmm escape 5: F3 0F 38 */
    238 	Pq5w = 0x6b /* Pq5 with Rex.w F3 0F 38 */
    239 	Pfw  = 0xf4 /* Pf3 with Rex.w: f3 48 0f */
    240 	Pw   = 0x48 /* Rex.w */
    241 	Pw8  = 0x90 // symbolic; exact value doesn't matter
    242 	Py   = 0x80 /* defaults to 64-bit mode */
    243 	Py1  = 0x81 // symbolic; exact value doesn't matter
    244 	Py3  = 0x83 // symbolic; exact value doesn't matter
    245 	Pvex = 0x84 // symbolic: exact value doesn't matter
    246 
    247 	Rxw = 1 << 3 /* =1, 64-bit operand size */
    248 	Rxr = 1 << 2 /* extend modrm reg */
    249 	Rxx = 1 << 1 /* extend sib index */
    250 	Rxb = 1 << 0 /* extend modrm r/m, sib base, or opcode reg */
    251 )
    252 
    253 const (
    254 	// Encoding for VEX prefix in tables.
    255 	// The P, L, and W fields are chosen to match
    256 	// their eventual locations in the VEX prefix bytes.
    257 
    258 	// V field - 4 bits; ignored by encoder
    259 	vexNOVSR = 0 // No VEX-SPECIFIED-REGISTER
    260 	vexNDS   = 0
    261 	vexNDD   = 0
    262 	vexDDS   = 0
    263 	// P field - 2 bits
    264 	vex66 = 1 << 0
    265 	vexF3 = 2 << 0
    266 	vexF2 = 3 << 0
    267 	// L field - 1 bit
    268 	vexLZ  = 0 << 2
    269 	vexLIG = 0 << 2
    270 	vex128 = 0 << 2
    271 	vex256 = 1 << 2
    272 	// W field - 1 bit
    273 	vexWIG = 0 << 7
    274 	vexW0  = 0 << 7
    275 	vexW1  = 1 << 7
    276 	// M field - 5 bits, but mostly reserved; we can store up to 4
    277 	vex0F   = 1 << 3
    278 	vex0F38 = 2 << 3
    279 	vex0F3A = 3 << 3
    280 
    281 	// Combinations used in the manual.
    282 	VEX_DDS_LIG_66_0F38_W1    = vexDDS | vexLIG | vex66 | vex0F38 | vexW1
    283 	VEX_NDD_128_66_0F_WIG     = vexNDD | vex128 | vex66 | vex0F | vexWIG
    284 	VEX_NDD_256_66_0F_WIG     = vexNDD | vex256 | vex66 | vex0F | vexWIG
    285 	VEX_NDD_LZ_F2_0F38_W0     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW0
    286 	VEX_NDD_LZ_F2_0F38_W1     = vexNDD | vexLZ | vexF2 | vex0F38 | vexW1
    287 	VEX_NDS_128_66_0F_WIG     = vexNDS | vex128 | vex66 | vex0F | vexWIG
    288 	VEX_NDS_128_66_0F38_WIG   = vexNDS | vex128 | vex66 | vex0F38 | vexWIG
    289 	VEX_NDS_128_F2_0F_WIG     = vexNDS | vex128 | vexF2 | vex0F | vexWIG
    290 	VEX_NDS_256_66_0F_WIG     = vexNDS | vex256 | vex66 | vex0F | vexWIG
    291 	VEX_NDS_256_66_0F38_WIG   = vexNDS | vex256 | vex66 | vex0F38 | vexWIG
    292 	VEX_NDS_256_66_0F3A_W0    = vexNDS | vex256 | vex66 | vex0F3A | vexW0
    293 	VEX_NDS_256_66_0F3A_WIG   = vexNDS | vex256 | vex66 | vex0F3A | vexWIG
    294 	VEX_NDS_LZ_0F38_W0        = vexNDS | vexLZ | vex0F38 | vexW0
    295 	VEX_NDS_LZ_0F38_W1        = vexNDS | vexLZ | vex0F38 | vexW1
    296 	VEX_NDS_LZ_66_0F38_W0     = vexNDS | vexLZ | vex66 | vex0F38 | vexW0
    297 	VEX_NDS_LZ_66_0F38_W1     = vexNDS | vexLZ | vex66 | vex0F38 | vexW1
    298 	VEX_NDS_LZ_F2_0F38_W0     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW0
    299 	VEX_NDS_LZ_F2_0F38_W1     = vexNDS | vexLZ | vexF2 | vex0F38 | vexW1
    300 	VEX_NDS_LZ_F3_0F38_W0     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW0
    301 	VEX_NDS_LZ_F3_0F38_W1     = vexNDS | vexLZ | vexF3 | vex0F38 | vexW1
    302 	VEX_NOVSR_128_66_0F_WIG   = vexNOVSR | vex128 | vex66 | vex0F | vexWIG
    303 	VEX_NOVSR_128_66_0F38_W0  = vexNOVSR | vex128 | vex66 | vex0F38 | vexW0
    304 	VEX_NOVSR_128_66_0F38_WIG = vexNOVSR | vex128 | vex66 | vex0F38 | vexWIG
    305 	VEX_NOVSR_128_F2_0F_WIG   = vexNOVSR | vex128 | vexF2 | vex0F | vexWIG
    306 	VEX_NOVSR_128_F3_0F_WIG   = vexNOVSR | vex128 | vexF3 | vex0F | vexWIG
    307 	VEX_NOVSR_256_66_0F_WIG   = vexNOVSR | vex256 | vex66 | vex0F | vexWIG
    308 	VEX_NOVSR_256_66_0F38_W0  = vexNOVSR | vex256 | vex66 | vex0F38 | vexW0
    309 	VEX_NOVSR_256_66_0F38_WIG = vexNOVSR | vex256 | vex66 | vex0F38 | vexWIG
    310 	VEX_NOVSR_256_F2_0F_WIG   = vexNOVSR | vex256 | vexF2 | vex0F | vexWIG
    311 	VEX_NOVSR_256_F3_0F_WIG   = vexNOVSR | vex256 | vexF3 | vex0F | vexWIG
    312 	VEX_NOVSR_LZ_F2_0F3A_W0   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW0
    313 	VEX_NOVSR_LZ_F2_0F3A_W1   = vexNOVSR | vexLZ | vexF2 | vex0F3A | vexW1
    314 )
    315 
    316 var ycover [Ymax * Ymax]uint8
    317 
    318 var reg [MAXREG]int
    319 
    320 var regrex [MAXREG + 1]int
    321 
    322 var ynone = []ytab{
    323 	{Zlit, 1, argList{}},
    324 }
    325 
    326 var ytext = []ytab{
    327 	{Zpseudo, 0, argList{Ymb, Ytextsize}},
    328 	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
    329 }
    330 
    331 var ynop = []ytab{
    332 	{Zpseudo, 0, argList{}},
    333 	{Zpseudo, 0, argList{Yiauto}},
    334 	{Zpseudo, 0, argList{Yml}},
    335 	{Zpseudo, 0, argList{Yrf}},
    336 	{Zpseudo, 0, argList{Yxr}},
    337 	{Zpseudo, 0, argList{Yiauto}},
    338 	{Zpseudo, 0, argList{Yml}},
    339 	{Zpseudo, 0, argList{Yrf}},
    340 	{Zpseudo, 1, argList{Yxr}},
    341 }
    342 
    343 var yfuncdata = []ytab{
    344 	{Zpseudo, 0, argList{Yi32, Ym}},
    345 }
    346 
    347 var ypcdata = []ytab{
    348 	{Zpseudo, 0, argList{Yi32, Yi32}},
    349 }
    350 
    351 var yxorb = []ytab{
    352 	{Zib_, 1, argList{Yi32, Yal}},
    353 	{Zibo_m, 2, argList{Yi32, Ymb}},
    354 	{Zr_m, 1, argList{Yrb, Ymb}},
    355 	{Zm_r, 1, argList{Ymb, Yrb}},
    356 }
    357 
    358 var yaddl = []ytab{
    359 	{Zibo_m, 2, argList{Yi8, Yml}},
    360 	{Zil_, 1, argList{Yi32, Yax}},
    361 	{Zilo_m, 2, argList{Yi32, Yml}},
    362 	{Zr_m, 1, argList{Yrl, Yml}},
    363 	{Zm_r, 1, argList{Yml, Yrl}},
    364 }
    365 
    366 var yincl = []ytab{
    367 	{Z_rp, 1, argList{Yrl}},
    368 	{Zo_m, 2, argList{Yml}},
    369 }
    370 
    371 var yincq = []ytab{
    372 	{Zo_m, 2, argList{Yml}},
    373 }
    374 
    375 var ycmpb = []ytab{
    376 	{Z_ib, 1, argList{Yal, Yi32}},
    377 	{Zm_ibo, 2, argList{Ymb, Yi32}},
    378 	{Zm_r, 1, argList{Ymb, Yrb}},
    379 	{Zr_m, 1, argList{Yrb, Ymb}},
    380 }
    381 
    382 var ycmpl = []ytab{
    383 	{Zm_ibo, 2, argList{Yml, Yi8}},
    384 	{Z_il, 1, argList{Yax, Yi32}},
    385 	{Zm_ilo, 2, argList{Yml, Yi32}},
    386 	{Zm_r, 1, argList{Yml, Yrl}},
    387 	{Zr_m, 1, argList{Yrl, Yml}},
    388 }
    389 
    390 var yshb = []ytab{
    391 	{Zo_m, 2, argList{Yi1, Ymb}},
    392 	{Zibo_m, 2, argList{Yu8, Ymb}},
    393 	{Zo_m, 2, argList{Ycx, Ymb}},
    394 }
    395 
    396 var yshl = []ytab{
    397 	{Zo_m, 2, argList{Yi1, Yml}},
    398 	{Zibo_m, 2, argList{Yu8, Yml}},
    399 	{Zo_m, 2, argList{Ycl, Yml}},
    400 	{Zo_m, 2, argList{Ycx, Yml}},
    401 }
    402 
    403 var ytestl = []ytab{
    404 	{Zil_, 1, argList{Yi32, Yax}},
    405 	{Zilo_m, 2, argList{Yi32, Yml}},
    406 	{Zr_m, 1, argList{Yrl, Yml}},
    407 	{Zm_r, 1, argList{Yml, Yrl}},
    408 }
    409 
    410 var ymovb = []ytab{
    411 	{Zr_m, 1, argList{Yrb, Ymb}},
    412 	{Zm_r, 1, argList{Ymb, Yrb}},
    413 	{Zib_rp, 1, argList{Yi32, Yrb}},
    414 	{Zibo_m, 2, argList{Yi32, Ymb}},
    415 }
    416 
    417 var ybtl = []ytab{
    418 	{Zibo_m, 2, argList{Yi8, Yml}},
    419 	{Zr_m, 1, argList{Yrl, Yml}},
    420 }
    421 
    422 var ymovw = []ytab{
    423 	{Zr_m, 1, argList{Yrl, Yml}},
    424 	{Zm_r, 1, argList{Yml, Yrl}},
    425 	{Zil_rp, 1, argList{Yi32, Yrl}},
    426 	{Zilo_m, 2, argList{Yi32, Yml}},
    427 	{Zaut_r, 2, argList{Yiauto, Yrl}},
    428 }
    429 
    430 var ymovl = []ytab{
    431 	{Zr_m, 1, argList{Yrl, Yml}},
    432 	{Zm_r, 1, argList{Yml, Yrl}},
    433 	{Zil_rp, 1, argList{Yi32, Yrl}},
    434 	{Zilo_m, 2, argList{Yi32, Yml}},
    435 	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
    436 	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
    437 	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
    438 	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
    439 	{Zaut_r, 2, argList{Yiauto, Yrl}},
    440 }
    441 
    442 var yret = []ytab{
    443 	{Zo_iw, 1, argList{}},
    444 	{Zo_iw, 1, argList{Yi32}},
    445 }
    446 
    447 var ymovq = []ytab{
    448 	// valid in 32-bit mode
    449 	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
    450 	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
    451 	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
    452 	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
    453 	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
    454 
    455 	// valid only in 64-bit mode, usually with 64-bit prefix
    456 	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
    457 	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
    458 	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
    459 	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
    460 	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
    461 	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
    462 	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
    463 	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
    464 	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
    465 	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
    466 }
    467 
    468 var ym_rl = []ytab{
    469 	{Zm_r, 1, argList{Ym, Yrl}},
    470 }
    471 
    472 var yrl_m = []ytab{
    473 	{Zr_m, 1, argList{Yrl, Ym}},
    474 }
    475 
    476 var ymb_rl = []ytab{
    477 	{Zmb_r, 1, argList{Ymb, Yrl}},
    478 }
    479 
    480 var yml_rl = []ytab{
    481 	{Zm_r, 1, argList{Yml, Yrl}},
    482 }
    483 
    484 var yrl_ml = []ytab{
    485 	{Zr_m, 1, argList{Yrl, Yml}},
    486 }
    487 
    488 var yml_mb = []ytab{
    489 	{Zr_m, 1, argList{Yrb, Ymb}},
    490 	{Zm_r, 1, argList{Ymb, Yrb}},
    491 }
    492 
    493 var yrb_mb = []ytab{
    494 	{Zr_m, 1, argList{Yrb, Ymb}},
    495 }
    496 
    497 var yxchg = []ytab{
    498 	{Z_rp, 1, argList{Yax, Yrl}},
    499 	{Zrp_, 1, argList{Yrl, Yax}},
    500 	{Zr_m, 1, argList{Yrl, Yml}},
    501 	{Zm_r, 1, argList{Yml, Yrl}},
    502 }
    503 
    504 var ydivl = []ytab{
    505 	{Zm_o, 2, argList{Yml}},
    506 }
    507 
    508 var ydivb = []ytab{
    509 	{Zm_o, 2, argList{Ymb}},
    510 }
    511 
    512 var yimul = []ytab{
    513 	{Zm_o, 2, argList{Yml}},
    514 	{Zib_rr, 1, argList{Yi8, Yrl}},
    515 	{Zil_rr, 1, argList{Yi32, Yrl}},
    516 	{Zm_r, 2, argList{Yml, Yrl}},
    517 }
    518 
    519 var yimul3 = []ytab{
    520 	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
    521 }
    522 
    523 var ybyte = []ytab{
    524 	{Zbyte, 1, argList{Yi64}},
    525 }
    526 
    527 var yin = []ytab{
    528 	{Zib_, 1, argList{Yi32}},
    529 	{Zlit, 1, argList{}},
    530 }
    531 
    532 var yint = []ytab{
    533 	{Zib_, 1, argList{Yi32}},
    534 }
    535 
    536 var ypushl = []ytab{
    537 	{Zrp_, 1, argList{Yrl}},
    538 	{Zm_o, 2, argList{Ym}},
    539 	{Zib_, 1, argList{Yi8}},
    540 	{Zil_, 1, argList{Yi32}},
    541 }
    542 
    543 var ypopl = []ytab{
    544 	{Z_rp, 1, argList{Yrl}},
    545 	{Zo_m, 2, argList{Ym}},
    546 }
    547 
    548 var yclflush = []ytab{
    549 	{Zo_m, 2, argList{Ym}},
    550 }
    551 
    552 var ybswap = []ytab{
    553 	{Z_rp, 2, argList{Yrl}},
    554 }
    555 
    556 var yscond = []ytab{
    557 	{Zo_m, 2, argList{Ymb}},
    558 }
    559 
    560 var yjcond = []ytab{
    561 	{Zbr, 0, argList{Ybr}},
    562 	{Zbr, 0, argList{Yi0, Ybr}},
    563 	{Zbr, 1, argList{Yi1, Ybr}},
    564 }
    565 
    566 var yloop = []ytab{
    567 	{Zloop, 1, argList{Ybr}},
    568 }
    569 
    570 var ycall = []ytab{
    571 	{Zcallindreg, 0, argList{Yml}},
    572 	{Zcallindreg, 2, argList{Yrx, Yrx}},
    573 	{Zcallind, 2, argList{Yindir}},
    574 	{Zcall, 0, argList{Ybr}},
    575 	{Zcallcon, 1, argList{Yi32}},
    576 }
    577 
    578 var yduff = []ytab{
    579 	{Zcallduff, 1, argList{Yi32}},
    580 }
    581 
    582 var yjmp = []ytab{
    583 	{Zo_m64, 2, argList{Yml}},
    584 	{Zjmp, 0, argList{Ybr}},
    585 	{Zjmpcon, 1, argList{Yi32}},
    586 }
    587 
    588 var yfmvd = []ytab{
    589 	{Zm_o, 2, argList{Ym, Yf0}},
    590 	{Zo_m, 2, argList{Yf0, Ym}},
    591 	{Zm_o, 2, argList{Yrf, Yf0}},
    592 	{Zo_m, 2, argList{Yf0, Yrf}},
    593 }
    594 
    595 var yfmvdp = []ytab{
    596 	{Zo_m, 2, argList{Yf0, Ym}},
    597 	{Zo_m, 2, argList{Yf0, Yrf}},
    598 }
    599 
    600 var yfmvf = []ytab{
    601 	{Zm_o, 2, argList{Ym, Yf0}},
    602 	{Zo_m, 2, argList{Yf0, Ym}},
    603 }
    604 
    605 var yfmvx = []ytab{
    606 	{Zm_o, 2, argList{Ym, Yf0}},
    607 }
    608 
    609 var yfmvp = []ytab{
    610 	{Zo_m, 2, argList{Yf0, Ym}},
    611 }
    612 
    613 var yfcmv = []ytab{
    614 	{Zm_o, 2, argList{Yrf, Yf0}},
    615 }
    616 
    617 var yfadd = []ytab{
    618 	{Zm_o, 2, argList{Ym, Yf0}},
    619 	{Zm_o, 2, argList{Yrf, Yf0}},
    620 	{Zo_m, 2, argList{Yf0, Yrf}},
    621 }
    622 
    623 var yfxch = []ytab{
    624 	{Zo_m, 2, argList{Yf0, Yrf}},
    625 	{Zm_o, 2, argList{Yrf, Yf0}},
    626 }
    627 
    628 var ycompp = []ytab{
    629 	{Zo_m, 2, argList{Yf0, Yrf}}, /* botch is really f0,f1 */
    630 }
    631 
    632 var ystsw = []ytab{
    633 	{Zo_m, 2, argList{Ym}},
    634 	{Zlit, 1, argList{Yax}},
    635 }
    636 
    637 var ysvrs_mo = []ytab{
    638 	{Zm_o, 2, argList{Ym}},
    639 }
    640 
    641 // unaryDst version of "ysvrs_mo".
    642 var ysvrs_om = []ytab{
    643 	{Zo_m, 2, argList{Ym}},
    644 }
    645 
    646 var ymm = []ytab{
    647 	{Zm_r_xm, 1, argList{Ymm, Ymr}},
    648 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    649 }
    650 
    651 var yxm = []ytab{
    652 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    653 }
    654 
    655 var yxm_q4 = []ytab{
    656 	{Zm_r, 1, argList{Yxm, Yxr}},
    657 }
    658 
    659 var yxcvm1 = []ytab{
    660 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    661 	{Zm_r_xm, 2, argList{Yxm, Ymr}},
    662 }
    663 
    664 var yxcvm2 = []ytab{
    665 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    666 	{Zm_r_xm, 2, argList{Ymm, Yxr}},
    667 }
    668 
    669 var yxr = []ytab{
    670 	{Zm_r_xm, 1, argList{Yxr, Yxr}},
    671 }
    672 
    673 var yxr_ml = []ytab{
    674 	{Zr_m_xm, 1, argList{Yxr, Yml}},
    675 }
    676 
    677 var ymr = []ytab{
    678 	{Zm_r, 1, argList{Ymr, Ymr}},
    679 }
    680 
    681 var ymr_ml = []ytab{
    682 	{Zr_m_xm, 1, argList{Ymr, Yml}},
    683 }
    684 
    685 var yxcmpi = []ytab{
    686 	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
    687 }
    688 
    689 var yxmov = []ytab{
    690 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    691 	{Zr_m_xm, 1, argList{Yxr, Yxm}},
    692 }
    693 
    694 var yxcvfl = []ytab{
    695 	{Zm_r_xm, 1, argList{Yxm, Yrl}},
    696 }
    697 
    698 var yxcvlf = []ytab{
    699 	{Zm_r_xm, 1, argList{Yml, Yxr}},
    700 }
    701 
    702 var yxcvfq = []ytab{
    703 	{Zm_r_xm, 2, argList{Yxm, Yrl}},
    704 }
    705 
    706 var yxcvqf = []ytab{
    707 	{Zm_r_xm, 2, argList{Yml, Yxr}},
    708 }
    709 
    710 var yps = []ytab{
    711 	{Zm_r_xm, 1, argList{Ymm, Ymr}},
    712 	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
    713 	{Zm_r_xm, 2, argList{Yxm, Yxr}},
    714 	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
    715 }
    716 
    717 var yxrrl = []ytab{
    718 	{Zm_r, 1, argList{Yxr, Yrl}},
    719 }
    720 
    721 var ymrxr = []ytab{
    722 	{Zm_r, 1, argList{Ymr, Yxr}},
    723 	{Zm_r_xm, 1, argList{Yxm, Yxr}},
    724 }
    725 
    726 var ymshuf = []ytab{
    727 	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
    728 }
    729 
    730 var ymshufb = []ytab{
    731 	{Zm2_r, 2, argList{Yxm, Yxr}},
    732 }
    733 
    734 // It should never have more than 1 entry,
    735 // because some optab entries you opcode secuences that
    736 // are longer than 2 bytes (zoffset=2 here),
    737 // ROUNDPD and ROUNDPS and recently added BLENDPD,
    738 // to name a few.
    739 var yxshuf = []ytab{
    740 	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
    741 }
    742 
    743 var yextrw = []ytab{
    744 	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
    745 }
    746 
    747 var yextr = []ytab{
    748 	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
    749 }
    750 
    751 var yinsrw = []ytab{
    752 	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
    753 }
    754 
    755 var yinsr = []ytab{
    756 	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
    757 }
    758 
    759 var ypsdq = []ytab{
    760 	{Zibo_m, 2, argList{Yi8, Yxr}},
    761 }
    762 
    763 var ymskb = []ytab{
    764 	{Zm_r_xm, 2, argList{Yxr, Yrl}},
    765 	{Zm_r_xm, 1, argList{Ymr, Yrl}},
    766 }
    767 
    768 var ycrc32l = []ytab{
    769 	{Zlitm_r, 0, argList{Yml, Yrl}},
    770 }
    771 
    772 var yprefetch = []ytab{
    773 	{Zm_o, 2, argList{Ym}},
    774 }
    775 
    776 var yaes = []ytab{
    777 	{Zlitm_r, 2, argList{Yxm, Yxr}},
    778 }
    779 
    780 var yxbegin = []ytab{
    781 	{Zjmp, 1, argList{Ybr}},
    782 }
    783 
    784 var yxabort = []ytab{
    785 	{Zib_, 1, argList{Yu8}},
    786 }
    787 
    788 var ylddqu = []ytab{
    789 	{Zm_r, 1, argList{Ym, Yxr}},
    790 }
    791 
    792 var ypalignr = []ytab{
    793 	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
    794 }
    795 
    796 // VEX instructions that come in two forms:
    797 //	VTHING xmm2/m128, xmmV, xmm1
    798 //	VTHING ymm2/m256, ymmV, ymm1
    799 // The opcode array in the corresponding Optab entry
    800 // should contain the (VEX prefixes, opcode byte) pair
    801 // for each of the two forms.
    802 // For example, the entries for VPXOR are:
    803 //
    804 //	VPXOR xmm2/m128, xmmV, xmm1
    805 //	VEX.NDS.128.66.0F.WIG EF /r
    806 //
    807 //	VPXOR ymm2/m256, ymmV, ymm1
    808 //	VEX.NDS.256.66.0F.WIG EF /r
    809 //
    810 // Produce this Optab entry:
    811 //
    812 //	{AVPXOR, yvex_xy3, Pvex, [23]uint8{VEX_NDS_128_66_0F_WIG, 0xEF, VEX_NDS_256_66_0F_WIG, 0xEF}}
    813 //
    814 var yvex_xy3 = []ytab{
    815 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
    816 	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
    817 }
    818 
    819 var yvex_x3 = []ytab{
    820 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
    821 }
    822 
    823 var yvex_ri3 = []ytab{
    824 	{Zvex_i_rm_r, 2, argList{Yi8, Ymb, Yrl}},
    825 }
    826 
    827 var yvex_xyi3 = []ytab{
    828 	{Zvex_i_rm_r, 2, argList{Yu8, Yxm, Yxr}},
    829 	{Zvex_i_rm_r, 2, argList{Yu8, Yym, Yyr}},
    830 	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
    831 	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
    832 }
    833 
    834 var yvex_yyi4 = []ytab{
    835 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
    836 }
    837 
    838 var yvex_xyi4 = []ytab{
    839 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yyr, Yyr}},
    840 }
    841 
    842 var yvex_shift = []ytab{
    843 	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
    844 	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
    845 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
    846 	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
    847 }
    848 
    849 var yvex_shift_dq = []ytab{
    850 	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
    851 	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
    852 }
    853 
    854 var yvex_r3 = []ytab{
    855 	{Zvex_rm_v_r, 2, argList{Yml, Yrl, Yrl}},
    856 }
    857 
    858 var yvex_vmr3 = []ytab{
    859 	{Zvex_v_rm_r, 2, argList{Yrl, Yml, Yrl}},
    860 }
    861 
    862 var yvex_xy2 = []ytab{
    863 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
    864 	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
    865 }
    866 
    867 var yvex_xyr2 = []ytab{
    868 	{Zvex_rm_v_r, 2, argList{Yxr, Yrl}},
    869 	{Zvex_rm_v_r, 2, argList{Yyr, Yrl}},
    870 }
    871 
    872 var yvex_vmovdqa = []ytab{
    873 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
    874 	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
    875 	{Zvex_rm_v_r, 2, argList{Yym, Yyr}},
    876 	{Zvex_r_v_rm, 2, argList{Yyr, Yym}},
    877 }
    878 
    879 var yvex_vmovntdq = []ytab{
    880 	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
    881 	{Zvex_r_v_rm, 2, argList{Yyr, Ym}},
    882 }
    883 
    884 var yvex_vpbroadcast = []ytab{
    885 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
    886 	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
    887 }
    888 
    889 var yvex_vpbroadcast_sd = []ytab{
    890 	{Zvex_rm_v_r, 2, argList{Yxm, Yyr}},
    891 }
    892 
    893 var yvex_vpextrw = []ytab{
    894 	{Zvex_i_rm_r, 2, argList{Yi8, Yxr, Yrl}},
    895 	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
    896 }
    897 
    898 var yvex_m = []ytab{
    899 	{Zvex_rm_v_ro, 3, argList{Ym}},
    900 }
    901 
    902 var yvex_xx3 = []ytab{
    903 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
    904 }
    905 
    906 var yvex_yi3 = []ytab{
    907 	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
    908 }
    909 
    910 var yvex_mxy = []ytab{
    911 	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
    912 	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
    913 }
    914 
    915 var yvex_yy3 = []ytab{
    916 	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
    917 }
    918 
    919 var yvex_xi3 = []ytab{
    920 	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
    921 }
    922 
    923 var yvex_vpermpd = []ytab{
    924 	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
    925 }
    926 
    927 var yvex_vpermilp = []ytab{
    928 	{Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}},
    929 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
    930 	{Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}},
    931 	{Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}},
    932 }
    933 
    934 var yvex_vcvtps2ph = []ytab{
    935 	{Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}},
    936 	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yxm}},
    937 }
    938 
    939 var yvex_vbroadcastf = []ytab{
    940 	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
    941 }
    942 
    943 var yvex_vmovd = []ytab{
    944 	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
    945 	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
    946 }
    947 
    948 var yvex_x2 = []ytab{
    949 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr}},
    950 }
    951 
    952 var yvex_y2 = []ytab{
    953 	{Zvex_rm_v_r, 2, argList{Yym, Yxr}},
    954 }
    955 
    956 var yvex = []ytab{
    957 	{Zvex, 2, argList{}},
    958 }
    959 
    960 var yvex_xx2 = []ytab{
    961 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
    962 }
    963 
    964 var yvex_vpalignr = []ytab{
    965 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
    966 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}},
    967 }
    968 
    969 var yvex_rxi4 = []ytab{
    970 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yml, Yxr, Yxr}},
    971 }
    972 
    973 var yvex_xxi4 = []ytab{
    974 	{Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}},
    975 }
    976 
    977 var yvex_xy4 = []ytab{
    978 	{Zvex_hr_rm_v_r, 2, argList{Yxr, Yxm, Yxr, Yxr}},
    979 	{Zvex_hr_rm_v_r, 2, argList{Yyr, Yym, Yyr, Yyr}},
    980 }
    981 
    982 var yvex_vpbroadcast_ss = []ytab{
    983 	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
    984 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
    985 	{Zvex_rm_v_r, 2, argList{Ym, Yyr}},
    986 	{Zvex_rm_v_r, 2, argList{Yxr, Yyr}},
    987 }
    988 
    989 var yvex_vblendvpd = []ytab{
    990 	{Zvex_r_v_rm, 2, argList{Yxr, Yxr, Yml}},
    991 	{Zvex_r_v_rm, 2, argList{Yyr, Yyr, Yml}},
    992 	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
    993 	{Zvex_rm_v_r, 2, argList{Ym, Yyr, Yyr}},
    994 }
    995 
    996 var yvex_vmov = []ytab{
    997 	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
    998 	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
    999 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   1000 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}},
   1001 }
   1002 
   1003 var yvex_vps = []ytab{
   1004 	{Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}},
   1005 	{Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}},
   1006 	{Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}},
   1007 	{Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}},
   1008 }
   1009 
   1010 var yvex_r2 = []ytab{
   1011 	{Zvex_rm_r_vo, 3, argList{Yml, Yrl}},
   1012 }
   1013 
   1014 var yvex_vpextr = []ytab{
   1015 	{Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}},
   1016 }
   1017 
   1018 var yvex_rx3 = []ytab{
   1019 	{Zvex_rm_v_r, 2, argList{Yml, Yxr, Yxr}},
   1020 }
   1021 
   1022 var yvex_vcvtsd2si = []ytab{
   1023 	{Zvex_rm_v_r, 2, argList{Yxm, Yrl}},
   1024 }
   1025 
   1026 var yvex_vmovhpd = []ytab{
   1027 	{Zvex_r_v_rm, 2, argList{Yxr, Ym}},
   1028 	{Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}},
   1029 }
   1030 
   1031 var yvex_vmovq = []ytab{
   1032 	{Zvex_r_v_rm, 2, argList{Yxr, Yml}},
   1033 	{Zvex_rm_v_r, 2, argList{Ym, Yxr}},
   1034 	{Zvex_rm_v_r, 2, argList{Yml, Yxr}},
   1035 	{Zvex_rm_v_r, 2, argList{Yxr, Yxr}},
   1036 	{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
   1037 }
   1038 
   1039 var yvpgatherdq = []ytab{
   1040 	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
   1041 	{Zvex_v_rm_r, 2, argList{Yyr, Yxvm, Yyr}},
   1042 }
   1043 
   1044 var yvpgatherqq = []ytab{
   1045 	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
   1046 	{Zvex_v_rm_r, 2, argList{Yyr, Yyvm, Yyr}},
   1047 }
   1048 
   1049 var yvgatherqps = []ytab{
   1050 	{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
   1051 	{Zvex_v_rm_r, 2, argList{Yxr, Yyvm, Yxr}},
   1052 }
   1053 
   1054 var ymmxmm0f38 = []ytab{
   1055 	{Zlitm_r, 3, argList{Ymm, Ymr}},
   1056 	{Zlitm_r, 5, argList{Yxm, Yxr}},
   1057 }
   1058 
   1059 var yextractps = []ytab{
   1060 	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   1061 }
   1062 
   1063 /*
   1064  * You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   1065  * ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   1066  * to find the entry with the given p.As and then looks through the ytable for
   1067  * that instruction (the second field in the optab struct) for a line whose
   1068  * first two values match the Ytypes of the p.From and p.To operands.  The
   1069  * function oclass computes the specific Ytype of an operand and then the set
   1070  * of more general Ytypes that it satisfies is implied by the ycover table, set
   1071  * up in instinit.  For example, oclass distinguishes the constants 0 and 1
   1072  * from the more general 8-bit constants, but instinit says
   1073  *
   1074  *        ycover[Yi0*Ymax+Ys32] = 1
   1075  *        ycover[Yi1*Ymax+Ys32] = 1
   1076  *        ycover[Yi8*Ymax+Ys32] = 1
   1077  *
   1078  * which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   1079  * if that's what an instruction can handle.
   1080  *
   1081  * In parallel with the scan through the ytable for the appropriate line, there
   1082  * is a z pointer that starts out pointing at the strange magic byte list in
   1083  * the Optab struct.  With each step past a non-matching ytable line, z
   1084  * advances by the 4th entry in the line.  When a matching line is found, that
   1085  * z pointer has the extra data to use in laying down the instruction bytes.
   1086  * The actual bytes laid down are a function of the 3rd entry in the line (that
   1087  * is, the Ztype) and the z bytes.
   1088  *
   1089  * For example, let's look at AADDL.  The optab line says:
   1090  *        {AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   1091  *
   1092  * and yaddl says
   1093  *        var yaddl = []ytab{
   1094  *                {Yi8, Ynone, Yml, Zibo_m, 2},
   1095  *                {Yi32, Ynone, Yax, Zil_, 1},
   1096  *                {Yi32, Ynone, Yml, Zilo_m, 2},
   1097  *                {Yrl, Ynone, Yml, Zr_m, 1},
   1098  *                {Yml, Ynone, Yrl, Zm_r, 1},
   1099  *        }
   1100  *
   1101  * so there are 5 possible types of ADDL instruction that can be laid down, and
   1102  * possible states used to lay them down (Ztype and z pointer, assuming z
   1103  * points at [23]uint8{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   1104  *
   1105  *        Yi8, Yml -> Zibo_m, z (0x83, 00)
   1106  *        Yi32, Yax -> Zil_, z+2 (0x05)
   1107  *        Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   1108  *        Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   1109  *        Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   1110  *
   1111  * The Pconstant in the optab line controls the prefix bytes to emit.  That's
   1112  * relatively straightforward as this program goes.
   1113  *
   1114  * The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   1115  * example, is an opcode byte (z[0]) then an asmando (which is some kind of
   1116  * encoded addressing mode for the Yml arg), and then a single immediate byte.
   1117  * Zilo_m is the same but a long (32-bit) immediate.
   1118  */
   1119 var optab =
   1120 /*	as, ytab, andproto, opcode */
   1121 []Optab{
   1122 	{obj.AXXX, nil, 0, [23]uint8{}},
   1123 	{AAAA, ynone, P32, [23]uint8{0x37}},
   1124 	{AAAD, ynone, P32, [23]uint8{0xd5, 0x0a}},
   1125 	{AAAM, ynone, P32, [23]uint8{0xd4, 0x0a}},
   1126 	{AAAS, ynone, P32, [23]uint8{0x3f}},
   1127 	{AADCB, yxorb, Pb, [23]uint8{0x14, 0x80, 02, 0x10, 0x12}},
   1128 	{AADCL, yaddl, Px, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   1129 	{AADCQ, yaddl, Pw, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   1130 	{AADCW, yaddl, Pe, [23]uint8{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   1131 	{AADCXL, yml_rl, Pq4, [23]uint8{0xf6}},
   1132 	{AADCXQ, yml_rl, Pq4w, [23]uint8{0xf6}},
   1133 	{AADDB, yxorb, Pb, [23]uint8{0x04, 0x80, 00, 0x00, 0x02}},
   1134 	{AADDL, yaddl, Px, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   1135 	{AADDPD, yxm, Pq, [23]uint8{0x58}},
   1136 	{AADDPS, yxm, Pm, [23]uint8{0x58}},
   1137 	{AADDQ, yaddl, Pw, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   1138 	{AADDSD, yxm, Pf2, [23]uint8{0x58}},
   1139 	{AADDSS, yxm, Pf3, [23]uint8{0x58}},
   1140 	{AADDSUBPD, yxm, Pq, [23]uint8{0xd0}},
   1141 	{AADDSUBPS, yxm, Pf2, [23]uint8{0xd0}},
   1142 	{AADDW, yaddl, Pe, [23]uint8{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   1143 	{AADOXL, yml_rl, Pq5, [23]uint8{0xf6}},
   1144 	{AADOXQ, yml_rl, Pq5w, [23]uint8{0xf6}},
   1145 	{AADJSP, nil, 0, [23]uint8{}},
   1146 	{AANDB, yxorb, Pb, [23]uint8{0x24, 0x80, 04, 0x20, 0x22}},
   1147 	{AANDL, yaddl, Px, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   1148 	{AANDNPD, yxm, Pq, [23]uint8{0x55}},
   1149 	{AANDNPS, yxm, Pm, [23]uint8{0x55}},
   1150 	{AANDPD, yxm, Pq, [23]uint8{0x54}},
   1151 	{AANDPS, yxm, Pm, [23]uint8{0x54}},
   1152 	{AANDQ, yaddl, Pw, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   1153 	{AANDW, yaddl, Pe, [23]uint8{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   1154 	{AARPL, yrl_ml, P32, [23]uint8{0x63}},
   1155 	{ABOUNDL, yrl_m, P32, [23]uint8{0x62}},
   1156 	{ABOUNDW, yrl_m, Pe, [23]uint8{0x62}},
   1157 	{ABSFL, yml_rl, Pm, [23]uint8{0xbc}},
   1158 	{ABSFQ, yml_rl, Pw, [23]uint8{0x0f, 0xbc}},
   1159 	{ABSFW, yml_rl, Pq, [23]uint8{0xbc}},
   1160 	{ABSRL, yml_rl, Pm, [23]uint8{0xbd}},
   1161 	{ABSRQ, yml_rl, Pw, [23]uint8{0x0f, 0xbd}},
   1162 	{ABSRW, yml_rl, Pq, [23]uint8{0xbd}},
   1163 	{ABSWAPL, ybswap, Px, [23]uint8{0x0f, 0xc8}},
   1164 	{ABSWAPQ, ybswap, Pw, [23]uint8{0x0f, 0xc8}},
   1165 	{ABTCL, ybtl, Pm, [23]uint8{0xba, 07, 0xbb}},
   1166 	{ABTCQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 07, 0x0f, 0xbb}},
   1167 	{ABTCW, ybtl, Pq, [23]uint8{0xba, 07, 0xbb}},
   1168 	{ABTL, ybtl, Pm, [23]uint8{0xba, 04, 0xa3}},
   1169 	{ABTQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 04, 0x0f, 0xa3}},
   1170 	{ABTRL, ybtl, Pm, [23]uint8{0xba, 06, 0xb3}},
   1171 	{ABTRQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 06, 0x0f, 0xb3}},
   1172 	{ABTRW, ybtl, Pq, [23]uint8{0xba, 06, 0xb3}},
   1173 	{ABTSL, ybtl, Pm, [23]uint8{0xba, 05, 0xab}},
   1174 	{ABTSQ, ybtl, Pw, [23]uint8{0x0f, 0xba, 05, 0x0f, 0xab}},
   1175 	{ABTSW, ybtl, Pq, [23]uint8{0xba, 05, 0xab}},
   1176 	{ABTW, ybtl, Pq, [23]uint8{0xba, 04, 0xa3}},
   1177 	{ABYTE, ybyte, Px, [23]uint8{1}},
   1178 	{obj.ACALL, ycall, Px, [23]uint8{0xff, 02, 0xff, 0x15, 0xe8}},
   1179 	{ACDQ, ynone, Px, [23]uint8{0x99}},
   1180 	{ACLC, ynone, Px, [23]uint8{0xf8}},
   1181 	{ACLD, ynone, Px, [23]uint8{0xfc}},
   1182 	{ACLFLUSH, yclflush, Pm, [23]uint8{0xae, 07}},
   1183 	{ACLI, ynone, Px, [23]uint8{0xfa}},
   1184 	{ACLTS, ynone, Pm, [23]uint8{0x06}},
   1185 	{ACMC, ynone, Px, [23]uint8{0xf5}},
   1186 	{ACMOVLCC, yml_rl, Pm, [23]uint8{0x43}},
   1187 	{ACMOVLCS, yml_rl, Pm, [23]uint8{0x42}},
   1188 	{ACMOVLEQ, yml_rl, Pm, [23]uint8{0x44}},
   1189 	{ACMOVLGE, yml_rl, Pm, [23]uint8{0x4d}},
   1190 	{ACMOVLGT, yml_rl, Pm, [23]uint8{0x4f}},
   1191 	{ACMOVLHI, yml_rl, Pm, [23]uint8{0x47}},
   1192 	{ACMOVLLE, yml_rl, Pm, [23]uint8{0x4e}},
   1193 	{ACMOVLLS, yml_rl, Pm, [23]uint8{0x46}},
   1194 	{ACMOVLLT, yml_rl, Pm, [23]uint8{0x4c}},
   1195 	{ACMOVLMI, yml_rl, Pm, [23]uint8{0x48}},
   1196 	{ACMOVLNE, yml_rl, Pm, [23]uint8{0x45}},
   1197 	{ACMOVLOC, yml_rl, Pm, [23]uint8{0x41}},
   1198 	{ACMOVLOS, yml_rl, Pm, [23]uint8{0x40}},
   1199 	{ACMOVLPC, yml_rl, Pm, [23]uint8{0x4b}},
   1200 	{ACMOVLPL, yml_rl, Pm, [23]uint8{0x49}},
   1201 	{ACMOVLPS, yml_rl, Pm, [23]uint8{0x4a}},
   1202 	{ACMOVQCC, yml_rl, Pw, [23]uint8{0x0f, 0x43}},
   1203 	{ACMOVQCS, yml_rl, Pw, [23]uint8{0x0f, 0x42}},
   1204 	{ACMOVQEQ, yml_rl, Pw, [23]uint8{0x0f, 0x44}},
   1205 	{ACMOVQGE, yml_rl, Pw, [23]uint8{0x0f, 0x4d}},
   1206 	{ACMOVQGT, yml_rl, Pw, [23]uint8{0x0f, 0x4f}},
   1207 	{ACMOVQHI, yml_rl, Pw, [23]uint8{0x0f, 0x47}},
   1208 	{ACMOVQLE, yml_rl, Pw, [23]uint8{0x0f, 0x4e}},
   1209 	{ACMOVQLS, yml_rl, Pw, [23]uint8{0x0f, 0x46}},
   1210 	{ACMOVQLT, yml_rl, Pw, [23]uint8{0x0f, 0x4c}},
   1211 	{ACMOVQMI, yml_rl, Pw, [23]uint8{0x0f, 0x48}},
   1212 	{ACMOVQNE, yml_rl, Pw, [23]uint8{0x0f, 0x45}},
   1213 	{ACMOVQOC, yml_rl, Pw, [23]uint8{0x0f, 0x41}},
   1214 	{ACMOVQOS, yml_rl, Pw, [23]uint8{0x0f, 0x40}},
   1215 	{ACMOVQPC, yml_rl, Pw, [23]uint8{0x0f, 0x4b}},
   1216 	{ACMOVQPL, yml_rl, Pw, [23]uint8{0x0f, 0x49}},
   1217 	{ACMOVQPS, yml_rl, Pw, [23]uint8{0x0f, 0x4a}},
   1218 	{ACMOVWCC, yml_rl, Pq, [23]uint8{0x43}},
   1219 	{ACMOVWCS, yml_rl, Pq, [23]uint8{0x42}},
   1220 	{ACMOVWEQ, yml_rl, Pq, [23]uint8{0x44}},
   1221 	{ACMOVWGE, yml_rl, Pq, [23]uint8{0x4d}},
   1222 	{ACMOVWGT, yml_rl, Pq, [23]uint8{0x4f}},
   1223 	{ACMOVWHI, yml_rl, Pq, [23]uint8{0x47}},
   1224 	{ACMOVWLE, yml_rl, Pq, [23]uint8{0x4e}},
   1225 	{ACMOVWLS, yml_rl, Pq, [23]uint8{0x46}},
   1226 	{ACMOVWLT, yml_rl, Pq, [23]uint8{0x4c}},
   1227 	{ACMOVWMI, yml_rl, Pq, [23]uint8{0x48}},
   1228 	{ACMOVWNE, yml_rl, Pq, [23]uint8{0x45}},
   1229 	{ACMOVWOC, yml_rl, Pq, [23]uint8{0x41}},
   1230 	{ACMOVWOS, yml_rl, Pq, [23]uint8{0x40}},
   1231 	{ACMOVWPC, yml_rl, Pq, [23]uint8{0x4b}},
   1232 	{ACMOVWPL, yml_rl, Pq, [23]uint8{0x49}},
   1233 	{ACMOVWPS, yml_rl, Pq, [23]uint8{0x4a}},
   1234 	{ACMPB, ycmpb, Pb, [23]uint8{0x3c, 0x80, 07, 0x38, 0x3a}},
   1235 	{ACMPL, ycmpl, Px, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1236 	{ACMPPD, yxcmpi, Px, [23]uint8{Pe, 0xc2}},
   1237 	{ACMPPS, yxcmpi, Pm, [23]uint8{0xc2, 0}},
   1238 	{ACMPQ, ycmpl, Pw, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1239 	{ACMPSB, ynone, Pb, [23]uint8{0xa6}},
   1240 	{ACMPSD, yxcmpi, Px, [23]uint8{Pf2, 0xc2}},
   1241 	{ACMPSL, ynone, Px, [23]uint8{0xa7}},
   1242 	{ACMPSQ, ynone, Pw, [23]uint8{0xa7}},
   1243 	{ACMPSS, yxcmpi, Px, [23]uint8{Pf3, 0xc2}},
   1244 	{ACMPSW, ynone, Pe, [23]uint8{0xa7}},
   1245 	{ACMPW, ycmpl, Pe, [23]uint8{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
   1246 	{ACOMISD, yxm, Pe, [23]uint8{0x2f}},
   1247 	{ACOMISS, yxm, Pm, [23]uint8{0x2f}},
   1248 	{ACPUID, ynone, Pm, [23]uint8{0xa2}},
   1249 	{ACVTPL2PD, yxcvm2, Px, [23]uint8{Pf3, 0xe6, Pe, 0x2a}},
   1250 	{ACVTPL2PS, yxcvm2, Pm, [23]uint8{0x5b, 0, 0x2a, 0}},
   1251 	{ACVTPD2PL, yxcvm1, Px, [23]uint8{Pf2, 0xe6, Pe, 0x2d}},
   1252 	{ACVTPD2PS, yxm, Pe, [23]uint8{0x5a}},
   1253 	{ACVTPS2PL, yxcvm1, Px, [23]uint8{Pe, 0x5b, Pm, 0x2d}},
   1254 	{ACVTPS2PD, yxm, Pm, [23]uint8{0x5a}},
   1255 	{ACVTSD2SL, yxcvfl, Pf2, [23]uint8{0x2d}},
   1256 	{ACVTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2d}},
   1257 	{ACVTSD2SS, yxm, Pf2, [23]uint8{0x5a}},
   1258 	{ACVTSL2SD, yxcvlf, Pf2, [23]uint8{0x2a}},
   1259 	{ACVTSQ2SD, yxcvqf, Pw, [23]uint8{Pf2, 0x2a}},
   1260 	{ACVTSL2SS, yxcvlf, Pf3, [23]uint8{0x2a}},
   1261 	{ACVTSQ2SS, yxcvqf, Pw, [23]uint8{Pf3, 0x2a}},
   1262 	{ACVTSS2SD, yxm, Pf3, [23]uint8{0x5a}},
   1263 	{ACVTSS2SL, yxcvfl, Pf3, [23]uint8{0x2d}},
   1264 	{ACVTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2d}},
   1265 	{ACVTTPD2PL, yxcvm1, Px, [23]uint8{Pe, 0xe6, Pe, 0x2c}},
   1266 	{ACVTTPS2PL, yxcvm1, Px, [23]uint8{Pf3, 0x5b, Pm, 0x2c}},
   1267 	{ACVTTSD2SL, yxcvfl, Pf2, [23]uint8{0x2c}},
   1268 	{ACVTTSD2SQ, yxcvfq, Pw, [23]uint8{Pf2, 0x2c}},
   1269 	{ACVTTSS2SL, yxcvfl, Pf3, [23]uint8{0x2c}},
   1270 	{ACVTTSS2SQ, yxcvfq, Pw, [23]uint8{Pf3, 0x2c}},
   1271 	{ACWD, ynone, Pe, [23]uint8{0x99}},
   1272 	{ACQO, ynone, Pw, [23]uint8{0x99}},
   1273 	{ADAA, ynone, P32, [23]uint8{0x27}},
   1274 	{ADAS, ynone, P32, [23]uint8{0x2f}},
   1275 	{ADECB, yscond, Pb, [23]uint8{0xfe, 01}},
   1276 	{ADECL, yincl, Px1, [23]uint8{0x48, 0xff, 01}},
   1277 	{ADECQ, yincq, Pw, [23]uint8{0xff, 01}},
   1278 	{ADECW, yincq, Pe, [23]uint8{0xff, 01}},
   1279 	{ADIVB, ydivb, Pb, [23]uint8{0xf6, 06}},
   1280 	{ADIVL, ydivl, Px, [23]uint8{0xf7, 06}},
   1281 	{ADIVPD, yxm, Pe, [23]uint8{0x5e}},
   1282 	{ADIVPS, yxm, Pm, [23]uint8{0x5e}},
   1283 	{ADIVQ, ydivl, Pw, [23]uint8{0xf7, 06}},
   1284 	{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
   1285 	{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
   1286 	{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
   1287 	{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
   1288 	{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
   1289 	{AEMMS, ynone, Pm, [23]uint8{0x77}},
   1290 	{AEXTRACTPS, yextractps, Pq, [23]uint8{0x3a, 0x17, 0}},
   1291 	{AENTER, nil, 0, [23]uint8{}}, /* botch */
   1292 	{AFXRSTOR, ysvrs_mo, Pm, [23]uint8{0xae, 01, 0xae, 01}},
   1293 	{AFXSAVE, ysvrs_om, Pm, [23]uint8{0xae, 00, 0xae, 00}},
   1294 	{AFXRSTOR64, ysvrs_mo, Pw, [23]uint8{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
   1295 	{AFXSAVE64, ysvrs_om, Pw, [23]uint8{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
   1296 	{AHLT, ynone, Px, [23]uint8{0xf4}},
   1297 	{AIDIVB, ydivb, Pb, [23]uint8{0xf6, 07}},
   1298 	{AIDIVL, ydivl, Px, [23]uint8{0xf7, 07}},
   1299 	{AIDIVQ, ydivl, Pw, [23]uint8{0xf7, 07}},
   1300 	{AIDIVW, ydivl, Pe, [23]uint8{0xf7, 07}},
   1301 	{AIMULB, ydivb, Pb, [23]uint8{0xf6, 05}},
   1302 	{AIMULL, yimul, Px, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1303 	{AIMULQ, yimul, Pw, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1304 	{AIMULW, yimul, Pe, [23]uint8{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
   1305 	{AIMUL3Q, yimul3, Pw, [23]uint8{0x6b, 00}},
   1306 	{AINB, yin, Pb, [23]uint8{0xe4, 0xec}},
   1307 	{AINCB, yscond, Pb, [23]uint8{0xfe, 00}},
   1308 	{AINCL, yincl, Px1, [23]uint8{0x40, 0xff, 00}},
   1309 	{AINCQ, yincq, Pw, [23]uint8{0xff, 00}},
   1310 	{AINCW, yincq, Pe, [23]uint8{0xff, 00}},
   1311 	{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
   1312 	{AINSB, ynone, Pb, [23]uint8{0x6c}},
   1313 	{AINSL, ynone, Px, [23]uint8{0x6d}},
   1314 	{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
   1315 	{AINSW, ynone, Pe, [23]uint8{0x6d}},
   1316 	{AINT, yint, Px, [23]uint8{0xcd}},
   1317 	{AINTO, ynone, P32, [23]uint8{0xce}},
   1318 	{AINW, yin, Pe, [23]uint8{0xe5, 0xed}},
   1319 	{AIRETL, ynone, Px, [23]uint8{0xcf}},
   1320 	{AIRETQ, ynone, Pw, [23]uint8{0xcf}},
   1321 	{AIRETW, ynone, Pe, [23]uint8{0xcf}},
   1322 	{AJCC, yjcond, Px, [23]uint8{0x73, 0x83, 00}},
   1323 	{AJCS, yjcond, Px, [23]uint8{0x72, 0x82}},
   1324 	{AJCXZL, yloop, Px, [23]uint8{0xe3}},
   1325 	{AJCXZW, yloop, Px, [23]uint8{0xe3}},
   1326 	{AJCXZQ, yloop, Px, [23]uint8{0xe3}},
   1327 	{AJEQ, yjcond, Px, [23]uint8{0x74, 0x84}},
   1328 	{AJGE, yjcond, Px, [23]uint8{0x7d, 0x8d}},
   1329 	{AJGT, yjcond, Px, [23]uint8{0x7f, 0x8f}},
   1330 	{AJHI, yjcond, Px, [23]uint8{0x77, 0x87}},
   1331 	{AJLE, yjcond, Px, [23]uint8{0x7e, 0x8e}},
   1332 	{AJLS, yjcond, Px, [23]uint8{0x76, 0x86}},
   1333 	{AJLT, yjcond, Px, [23]uint8{0x7c, 0x8c}},
   1334 	{AJMI, yjcond, Px, [23]uint8{0x78, 0x88}},
   1335 	{obj.AJMP, yjmp, Px, [23]uint8{0xff, 04, 0xeb, 0xe9}},
   1336 	{AJNE, yjcond, Px, [23]uint8{0x75, 0x85}},
   1337 	{AJOC, yjcond, Px, [23]uint8{0x71, 0x81, 00}},
   1338 	{AJOS, yjcond, Px, [23]uint8{0x70, 0x80, 00}},
   1339 	{AJPC, yjcond, Px, [23]uint8{0x7b, 0x8b}},
   1340 	{AJPL, yjcond, Px, [23]uint8{0x79, 0x89}},
   1341 	{AJPS, yjcond, Px, [23]uint8{0x7a, 0x8a}},
   1342 	{AHADDPD, yxm, Pq, [23]uint8{0x7c}},
   1343 	{AHADDPS, yxm, Pf2, [23]uint8{0x7c}},
   1344 	{AHSUBPD, yxm, Pq, [23]uint8{0x7d}},
   1345 	{AHSUBPS, yxm, Pf2, [23]uint8{0x7d}},
   1346 	{ALAHF, ynone, Px, [23]uint8{0x9f}},
   1347 	{ALARL, yml_rl, Pm, [23]uint8{0x02}},
   1348 	{ALARW, yml_rl, Pq, [23]uint8{0x02}},
   1349 	{ALDDQU, ylddqu, Pf2, [23]uint8{0xf0}},
   1350 	{ALDMXCSR, ysvrs_mo, Pm, [23]uint8{0xae, 02, 0xae, 02}},
   1351 	{ALEAL, ym_rl, Px, [23]uint8{0x8d}},
   1352 	{ALEAQ, ym_rl, Pw, [23]uint8{0x8d}},
   1353 	{ALEAVEL, ynone, P32, [23]uint8{0xc9}},
   1354 	{ALEAVEQ, ynone, Py, [23]uint8{0xc9}},
   1355 	{ALEAVEW, ynone, Pe, [23]uint8{0xc9}},
   1356 	{ALEAW, ym_rl, Pe, [23]uint8{0x8d}},
   1357 	{ALOCK, ynone, Px, [23]uint8{0xf0}},
   1358 	{ALODSB, ynone, Pb, [23]uint8{0xac}},
   1359 	{ALODSL, ynone, Px, [23]uint8{0xad}},
   1360 	{ALODSQ, ynone, Pw, [23]uint8{0xad}},
   1361 	{ALODSW, ynone, Pe, [23]uint8{0xad}},
   1362 	{ALONG, ybyte, Px, [23]uint8{4}},
   1363 	{ALOOP, yloop, Px, [23]uint8{0xe2}},
   1364 	{ALOOPEQ, yloop, Px, [23]uint8{0xe1}},
   1365 	{ALOOPNE, yloop, Px, [23]uint8{0xe0}},
   1366 	{ALSLL, yml_rl, Pm, [23]uint8{0x03}},
   1367 	{ALSLW, yml_rl, Pq, [23]uint8{0x03}},
   1368 	{AMASKMOVOU, yxr, Pe, [23]uint8{0xf7}},
   1369 	{AMASKMOVQ, ymr, Pm, [23]uint8{0xf7}},
   1370 	{AMAXPD, yxm, Pe, [23]uint8{0x5f}},
   1371 	{AMAXPS, yxm, Pm, [23]uint8{0x5f}},
   1372 	{AMAXSD, yxm, Pf2, [23]uint8{0x5f}},
   1373 	{AMAXSS, yxm, Pf3, [23]uint8{0x5f}},
   1374 	{AMINPD, yxm, Pe, [23]uint8{0x5d}},
   1375 	{AMINPS, yxm, Pm, [23]uint8{0x5d}},
   1376 	{AMINSD, yxm, Pf2, [23]uint8{0x5d}},
   1377 	{AMINSS, yxm, Pf3, [23]uint8{0x5d}},
   1378 	{AMOVAPD, yxmov, Pe, [23]uint8{0x28, 0x29}},
   1379 	{AMOVAPS, yxmov, Pm, [23]uint8{0x28, 0x29}},
   1380 	{AMOVB, ymovb, Pb, [23]uint8{0x88, 0x8a, 0xb0, 0xc6, 00}},
   1381 	{AMOVBLSX, ymb_rl, Pm, [23]uint8{0xbe}},
   1382 	{AMOVBLZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1383 	{AMOVBQSX, ymb_rl, Pw, [23]uint8{0x0f, 0xbe}},
   1384 	{AMOVBQZX, ymb_rl, Pm, [23]uint8{0xb6}},
   1385 	{AMOVBWSX, ymb_rl, Pq, [23]uint8{0xbe}},
   1386 	{AMOVBWZX, ymb_rl, Pq, [23]uint8{0xb6}},
   1387 	{AMOVO, yxmov, Pe, [23]uint8{0x6f, 0x7f}},
   1388 	{AMOVOU, yxmov, Pf3, [23]uint8{0x6f, 0x7f}},
   1389 	{AMOVHLPS, yxr, Pm, [23]uint8{0x12}},
   1390 	{AMOVHPD, yxmov, Pe, [23]uint8{0x16, 0x17}},
   1391 	{AMOVHPS, yxmov, Pm, [23]uint8{0x16, 0x17}},
   1392 	{AMOVL, ymovl, Px, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1393 	{AMOVLHPS, yxr, Pm, [23]uint8{0x16}},
   1394 	{AMOVLPD, yxmov, Pe, [23]uint8{0x12, 0x13}},
   1395 	{AMOVLPS, yxmov, Pm, [23]uint8{0x12, 0x13}},
   1396 	{AMOVLQSX, yml_rl, Pw, [23]uint8{0x63}},
   1397 	{AMOVLQZX, yml_rl, Px, [23]uint8{0x8b}},
   1398 	{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
   1399 	{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
   1400 	{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
   1401 	{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
   1402 	{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
   1403 	{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
   1404 	{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
   1405 	{AMOVQ, ymovq, Pw8, [23]uint8{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
   1406 	{AMOVQOZX, ymrxr, Pf3, [23]uint8{0xd6, 0x7e}},
   1407 	{AMOVSB, ynone, Pb, [23]uint8{0xa4}},
   1408 	{AMOVSD, yxmov, Pf2, [23]uint8{0x10, 0x11}},
   1409 	{AMOVSL, ynone, Px, [23]uint8{0xa5}},
   1410 	{AMOVSQ, ynone, Pw, [23]uint8{0xa5}},
   1411 	{AMOVSS, yxmov, Pf3, [23]uint8{0x10, 0x11}},
   1412 	{AMOVSW, ynone, Pe, [23]uint8{0xa5}},
   1413 	{AMOVUPD, yxmov, Pe, [23]uint8{0x10, 0x11}},
   1414 	{AMOVUPS, yxmov, Pm, [23]uint8{0x10, 0x11}},
   1415 	{AMOVW, ymovw, Pe, [23]uint8{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
   1416 	{AMOVWLSX, yml_rl, Pm, [23]uint8{0xbf}},
   1417 	{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
   1418 	{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
   1419 	{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
   1420 	{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
   1421 	{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
   1422 	{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
   1423 	{AMULPD, yxm, Pe, [23]uint8{0x59}},
   1424 	{AMULPS, yxm, Ym, [23]uint8{0x59}},
   1425 	{AMULQ, ydivl, Pw, [23]uint8{0xf7, 04}},
   1426 	{AMULSD, yxm, Pf2, [23]uint8{0x59}},
   1427 	{AMULSS, yxm, Pf3, [23]uint8{0x59}},
   1428 	{AMULW, ydivl, Pe, [23]uint8{0xf7, 04}},
   1429 	{ANEGB, yscond, Pb, [23]uint8{0xf6, 03}},
   1430 	{ANEGL, yscond, Px, [23]uint8{0xf7, 03}},
   1431 	{ANEGQ, yscond, Pw, [23]uint8{0xf7, 03}},
   1432 	{ANEGW, yscond, Pe, [23]uint8{0xf7, 03}},
   1433 	{obj.ANOP, ynop, Px, [23]uint8{0, 0}},
   1434 	{ANOTB, yscond, Pb, [23]uint8{0xf6, 02}},
   1435 	{ANOTL, yscond, Px, [23]uint8{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
   1436 	{ANOTQ, yscond, Pw, [23]uint8{0xf7, 02}},
   1437 	{ANOTW, yscond, Pe, [23]uint8{0xf7, 02}},
   1438 	{AORB, yxorb, Pb, [23]uint8{0x0c, 0x80, 01, 0x08, 0x0a}},
   1439 	{AORL, yaddl, Px, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1440 	{AORPD, yxm, Pq, [23]uint8{0x56}},
   1441 	{AORPS, yxm, Pm, [23]uint8{0x56}},
   1442 	{AORQ, yaddl, Pw, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1443 	{AORW, yaddl, Pe, [23]uint8{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
   1444 	{AOUTB, yin, Pb, [23]uint8{0xe6, 0xee}},
   1445 	{AOUTL, yin, Px, [23]uint8{0xe7, 0xef}},
   1446 	{AOUTSB, ynone, Pb, [23]uint8{0x6e}},
   1447 	{AOUTSL, ynone, Px, [23]uint8{0x6f}},
   1448 	{AOUTSW, ynone, Pe, [23]uint8{0x6f}},
   1449 	{AOUTW, yin, Pe, [23]uint8{0xe7, 0xef}},
   1450 	{APABSB, yxm_q4, Pq4, [23]uint8{0x1c}},
   1451 	{APABSD, yxm_q4, Pq4, [23]uint8{0x1e}},
   1452 	{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
   1453 	{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
   1454 	{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
   1455 	{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
   1456 	{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
   1457 	{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
   1458 	{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
   1459 	{APADDQ, yxm, Pe, [23]uint8{0xd4}},
   1460 	{APADDSB, ymm, Py1, [23]uint8{0xec, Pe, 0xec}},
   1461 	{APADDSW, ymm, Py1, [23]uint8{0xed, Pe, 0xed}},
   1462 	{APADDUSB, ymm, Py1, [23]uint8{0xdc, Pe, 0xdc}},
   1463 	{APADDUSW, ymm, Py1, [23]uint8{0xdd, Pe, 0xdd}},
   1464 	{APADDW, ymm, Py1, [23]uint8{0xfd, Pe, 0xfd}},
   1465 	{APALIGNR, ypalignr, Pq, [23]uint8{0x3a, 0x0f}},
   1466 	{APAND, ymm, Py1, [23]uint8{0xdb, Pe, 0xdb}},
   1467 	{APANDN, ymm, Py1, [23]uint8{0xdf, Pe, 0xdf}},
   1468 	{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
   1469 	{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
   1470 	{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
   1471 	{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
   1472 	{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
   1473 	{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
   1474 	{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
   1475 	{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
   1476 	{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
   1477 	{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
   1478 	{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
   1479 	{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
   1480 	{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
   1481 	{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
   1482 	{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
   1483 	{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
   1484 	{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
   1485 	{APEXTRQ, yextr, Pq3, [23]uint8{0x3a, 0x16, 00}},
   1486 	{APHADDD, ymmxmm0f38, Px, [23]uint8{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
   1487 	{APHADDSW, yxm_q4, Pq4, [23]uint8{0x03}},
   1488 	{APHADDW, yxm_q4, Pq4, [23]uint8{0x01}},
   1489 	{APHMINPOSUW, yxm_q4, Pq4, [23]uint8{0x41}},
   1490 	{APHSUBD, yxm_q4, Pq4, [23]uint8{0x06}},
   1491 	{APHSUBSW, yxm_q4, Pq4, [23]uint8{0x07}},
   1492 	{APHSUBW, yxm_q4, Pq4, [23]uint8{0x05}},
   1493 	{APINSRW, yinsrw, Pq, [23]uint8{0xc4, 00}},
   1494 	{APINSRB, yinsr, Pq, [23]uint8{0x3a, 0x20, 00}},
   1495 	{APINSRD, yinsr, Pq, [23]uint8{0x3a, 0x22, 00}},
   1496 	{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
   1497 	{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
   1498 	{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
   1499 	{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
   1500 	{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
   1501 	{APMAXSW, yxm, Pe, [23]uint8{0xee}},
   1502 	{APMAXUB, yxm, Pe, [23]uint8{0xde}},
   1503 	{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
   1504 	{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
   1505 	{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
   1506 	{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
   1507 	{APMINSW, yxm, Pe, [23]uint8{0xea}},
   1508 	{APMINUB, yxm, Pe, [23]uint8{0xda}},
   1509 	{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
   1510 	{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
   1511 	{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
   1512 	{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
   1513 	{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
   1514 	{APMOVSXBW, yxm_q4, Pq4, [23]uint8{0x20}},
   1515 	{APMOVSXDQ, yxm_q4, Pq4, [23]uint8{0x25}},
   1516 	{APMOVSXWD, yxm_q4, Pq4, [23]uint8{0x23}},
   1517 	{APMOVSXWQ, yxm_q4, Pq4, [23]uint8{0x24}},
   1518 	{APMOVZXBD, yxm_q4, Pq4, [23]uint8{0x31}},
   1519 	{APMOVZXBQ, yxm_q4, Pq4, [23]uint8{0x32}},
   1520 	{APMOVZXBW, yxm_q4, Pq4, [23]uint8{0x30}},
   1521 	{APMOVZXDQ, yxm_q4, Pq4, [23]uint8{0x35}},
   1522 	{APMOVZXWD, yxm_q4, Pq4, [23]uint8{0x33}},
   1523 	{APMOVZXWQ, yxm_q4, Pq4, [23]uint8{0x34}},
   1524 	{APMULDQ, yxm_q4, Pq4, [23]uint8{0x28}},
   1525 	{APMULHRSW, yxm_q4, Pq4, [23]uint8{0x0b}},
   1526 	{APMULHUW, ymm, Py1, [23]uint8{0xe4, Pe, 0xe4}},
   1527 	{APMULHW, ymm, Py1, [23]uint8{0xe5, Pe, 0xe5}},
   1528 	{APMULLD, yxm_q4, Pq4, [23]uint8{0x40}},
   1529 	{APMULLW, ymm, Py1, [23]uint8{0xd5, Pe, 0xd5}},
   1530 	{APMULULQ, ymm, Py1, [23]uint8{0xf4, Pe, 0xf4}},
   1531 	{APOPAL, ynone, P32, [23]uint8{0x61}},
   1532 	{APOPAW, ynone, Pe, [23]uint8{0x61}},
   1533 	{APOPCNTW, yml_rl, Pef3, [23]uint8{0xb8}},
   1534 	{APOPCNTL, yml_rl, Pf3, [23]uint8{0xb8}},
   1535 	{APOPCNTQ, yml_rl, Pfw, [23]uint8{0xb8}},
   1536 	{APOPFL, ynone, P32, [23]uint8{0x9d}},
   1537 	{APOPFQ, ynone, Py, [23]uint8{0x9d}},
   1538 	{APOPFW, ynone, Pe, [23]uint8{0x9d}},
   1539 	{APOPL, ypopl, P32, [23]uint8{0x58, 0x8f, 00}},
   1540 	{APOPQ, ypopl, Py, [23]uint8{0x58, 0x8f, 00}},
   1541 	{APOPW, ypopl, Pe, [23]uint8{0x58, 0x8f, 00}},
   1542 	{APOR, ymm, Py1, [23]uint8{0xeb, Pe, 0xeb}},
   1543 	{APSADBW, yxm, Pq, [23]uint8{0xf6}},
   1544 	{APSHUFHW, yxshuf, Pf3, [23]uint8{0x70, 00}},
   1545 	{APSHUFL, yxshuf, Pq, [23]uint8{0x70, 00}},
   1546 	{APSHUFLW, yxshuf, Pf2, [23]uint8{0x70, 00}},
   1547 	{APSHUFW, ymshuf, Pm, [23]uint8{0x70, 00}},
   1548 	{APSHUFB, ymshufb, Pq, [23]uint8{0x38, 0x00}},
   1549 	{APSIGNB, yxm_q4, Pq4, [23]uint8{0x08}},
   1550 	{APSIGND, yxm_q4, Pq4, [23]uint8{0x0a}},
   1551 	{APSIGNW, yxm_q4, Pq4, [23]uint8{0x09}},
   1552 	{APSLLO, ypsdq, Pq, [23]uint8{0x73, 07}},
   1553 	{APSLLL, yps, Py3, [23]uint8{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
   1554 	{APSLLQ, yps, Py3, [23]uint8{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
   1555 	{APSLLW, yps, Py3, [23]uint8{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
   1556 	{APSRAL, yps, Py3, [23]uint8{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
   1557 	{APSRAW, yps, Py3, [23]uint8{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
   1558 	{APSRLO, ypsdq, Pq, [23]uint8{0x73, 03}},
   1559 	{APSRLL, yps, Py3, [23]uint8{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
   1560 	{APSRLQ, yps, Py3, [23]uint8{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
   1561 	{APSRLW, yps, Py3, [23]uint8{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
   1562 	{APSUBB, yxm, Pe, [23]uint8{0xf8}},
   1563 	{APSUBL, yxm, Pe, [23]uint8{0xfa}},
   1564 	{APSUBQ, yxm, Pe, [23]uint8{0xfb}},
   1565 	{APSUBSB, yxm, Pe, [23]uint8{0xe8}},
   1566 	{APSUBSW, yxm, Pe, [23]uint8{0xe9}},
   1567 	{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
   1568 	{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
   1569 	{APSUBW, yxm, Pe, [23]uint8{0xf9}},
   1570 	{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
   1571 	{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
   1572 	{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
   1573 	{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
   1574 	{APUNPCKHWL, ymm, Py1, [23]uint8{0x69, Pe, 0x69}},
   1575 	{APUNPCKLBW, ymm, Py1, [23]uint8{0x60, Pe, 0x60}},
   1576 	{APUNPCKLLQ, ymm, Py1, [23]uint8{0x62, Pe, 0x62}},
   1577 	{APUNPCKLQDQ, yxm, Pe, [23]uint8{0x6c}},
   1578 	{APUNPCKLWL, ymm, Py1, [23]uint8{0x61, Pe, 0x61}},
   1579 	{APUSHAL, ynone, P32, [23]uint8{0x60}},
   1580 	{APUSHAW, ynone, Pe, [23]uint8{0x60}},
   1581 	{APUSHFL, ynone, P32, [23]uint8{0x9c}},
   1582 	{APUSHFQ, ynone, Py, [23]uint8{0x9c}},
   1583 	{APUSHFW, ynone, Pe, [23]uint8{0x9c}},
   1584 	{APUSHL, ypushl, P32, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1585 	{APUSHQ, ypushl, Py, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1586 	{APUSHW, ypushl, Pe, [23]uint8{0x50, 0xff, 06, 0x6a, 0x68}},
   1587 	{APXOR, ymm, Py1, [23]uint8{0xef, Pe, 0xef}},
   1588 	{AQUAD, ybyte, Px, [23]uint8{8}},
   1589 	{ARCLB, yshb, Pb, [23]uint8{0xd0, 02, 0xc0, 02, 0xd2, 02}},
   1590 	{ARCLL, yshl, Px, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1591 	{ARCLQ, yshl, Pw, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1592 	{ARCLW, yshl, Pe, [23]uint8{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
   1593 	{ARCPPS, yxm, Pm, [23]uint8{0x53}},
   1594 	{ARCPSS, yxm, Pf3, [23]uint8{0x53}},
   1595 	{ARCRB, yshb, Pb, [23]uint8{0xd0, 03, 0xc0, 03, 0xd2, 03}},
   1596 	{ARCRL, yshl, Px, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1597 	{ARCRQ, yshl, Pw, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1598 	{ARCRW, yshl, Pe, [23]uint8{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
   1599 	{AREP, ynone, Px, [23]uint8{0xf3}},
   1600 	{AREPN, ynone, Px, [23]uint8{0xf2}},
   1601 	{obj.ARET, ynone, Px, [23]uint8{0xc3}},
   1602 	{ARETFW, yret, Pe, [23]uint8{0xcb, 0xca}},
   1603 	{ARETFL, yret, Px, [23]uint8{0xcb, 0xca}},
   1604 	{ARETFQ, yret, Pw, [23]uint8{0xcb, 0xca}},
   1605 	{AROLB, yshb, Pb, [23]uint8{0xd0, 00, 0xc0, 00, 0xd2, 00}},
   1606 	{AROLL, yshl, Px, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1607 	{AROLQ, yshl, Pw, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1608 	{AROLW, yshl, Pe, [23]uint8{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
   1609 	{ARORB, yshb, Pb, [23]uint8{0xd0, 01, 0xc0, 01, 0xd2, 01}},
   1610 	{ARORL, yshl, Px, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1611 	{ARORQ, yshl, Pw, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1612 	{ARORW, yshl, Pe, [23]uint8{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
   1613 	{ARSQRTPS, yxm, Pm, [23]uint8{0x52}},
   1614 	{ARSQRTSS, yxm, Pf3, [23]uint8{0x52}},
   1615 	{ASAHF, ynone, Px1, [23]uint8{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, /* XCHGB AH,AL; PUSH AX; POPFL */
   1616 	{ASALB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1617 	{ASALL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1618 	{ASALQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1619 	{ASALW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1620 	{ASARB, yshb, Pb, [23]uint8{0xd0, 07, 0xc0, 07, 0xd2, 07}},
   1621 	{ASARL, yshl, Px, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1622 	{ASARQ, yshl, Pw, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1623 	{ASARW, yshl, Pe, [23]uint8{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
   1624 	{ASBBB, yxorb, Pb, [23]uint8{0x1c, 0x80, 03, 0x18, 0x1a}},
   1625 	{ASBBL, yaddl, Px, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1626 	{ASBBQ, yaddl, Pw, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1627 	{ASBBW, yaddl, Pe, [23]uint8{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
   1628 	{ASCASB, ynone, Pb, [23]uint8{0xae}},
   1629 	{ASCASL, ynone, Px, [23]uint8{0xaf}},
   1630 	{ASCASQ, ynone, Pw, [23]uint8{0xaf}},
   1631 	{ASCASW, ynone, Pe, [23]uint8{0xaf}},
   1632 	{ASETCC, yscond, Pb, [23]uint8{0x0f, 0x93, 00}},
   1633 	{ASETCS, yscond, Pb, [23]uint8{0x0f, 0x92, 00}},
   1634 	{ASETEQ, yscond, Pb, [23]uint8{0x0f, 0x94, 00}},
   1635 	{ASETGE, yscond, Pb, [23]uint8{0x0f, 0x9d, 00}},
   1636 	{ASETGT, yscond, Pb, [23]uint8{0x0f, 0x9f, 00}},
   1637 	{ASETHI, yscond, Pb, [23]uint8{0x0f, 0x97, 00}},
   1638 	{ASETLE, yscond, Pb, [23]uint8{0x0f, 0x9e, 00}},
   1639 	{ASETLS, yscond, Pb, [23]uint8{0x0f, 0x96, 00}},
   1640 	{ASETLT, yscond, Pb, [23]uint8{0x0f, 0x9c, 00}},
   1641 	{ASETMI, yscond, Pb, [23]uint8{0x0f, 0x98, 00}},
   1642 	{ASETNE, yscond, Pb, [23]uint8{0x0f, 0x95, 00}},
   1643 	{ASETOC, yscond, Pb, [23]uint8{0x0f, 0x91, 00}},
   1644 	{ASETOS, yscond, Pb, [23]uint8{0x0f, 0x90, 00}},
   1645 	{ASETPC, yscond, Pb, [23]uint8{0x0f, 0x9b, 00}},
   1646 	{ASETPL, yscond, Pb, [23]uint8{0x0f, 0x99, 00}},
   1647 	{ASETPS, yscond, Pb, [23]uint8{0x0f, 0x9a, 00}},
   1648 	{ASHLB, yshb, Pb, [23]uint8{0xd0, 04, 0xc0, 04, 0xd2, 04}},
   1649 	{ASHLL, yshl, Px, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1650 	{ASHLQ, yshl, Pw, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1651 	{ASHLW, yshl, Pe, [23]uint8{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
   1652 	{ASHRB, yshb, Pb, [23]uint8{0xd0, 05, 0xc0, 05, 0xd2, 05}},
   1653 	{ASHRL, yshl, Px, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1654 	{ASHRQ, yshl, Pw, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1655 	{ASHRW, yshl, Pe, [23]uint8{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
   1656 	{ASHUFPD, yxshuf, Pq, [23]uint8{0xc6, 00}},
   1657 	{ASHUFPS, yxshuf, Pm, [23]uint8{0xc6, 00}},
   1658 	{ASQRTPD, yxm, Pe, [23]uint8{0x51}},
   1659 	{ASQRTPS, yxm, Pm, [23]uint8{0x51}},
   1660 	{ASQRTSD, yxm, Pf2, [23]uint8{0x51}},
   1661 	{ASQRTSS, yxm, Pf3, [23]uint8{0x51}},
   1662 	{ASTC, ynone, Px, [23]uint8{0xf9}},
   1663 	{ASTD, ynone, Px, [23]uint8{0xfd}},
   1664 	{ASTI, ynone, Px, [23]uint8{0xfb}},
   1665 	{ASTMXCSR, ysvrs_om, Pm, [23]uint8{0xae, 03, 0xae, 03}},
   1666 	{ASTOSB, ynone, Pb, [23]uint8{0xaa}},
   1667 	{ASTOSL, ynone, Px, [23]uint8{0xab}},
   1668 	{ASTOSQ, ynone, Pw, [23]uint8{0xab}},
   1669 	{ASTOSW, ynone, Pe, [23]uint8{0xab}},
   1670 	{ASUBB, yxorb, Pb, [23]uint8{0x2c, 0x80, 05, 0x28, 0x2a}},
   1671 	{ASUBL, yaddl, Px, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1672 	{ASUBPD, yxm, Pe, [23]uint8{0x5c}},
   1673 	{ASUBPS, yxm, Pm, [23]uint8{0x5c}},
   1674 	{ASUBQ, yaddl, Pw, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1675 	{ASUBSD, yxm, Pf2, [23]uint8{0x5c}},
   1676 	{ASUBSS, yxm, Pf3, [23]uint8{0x5c}},
   1677 	{ASUBW, yaddl, Pe, [23]uint8{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
   1678 	{ASWAPGS, ynone, Pm, [23]uint8{0x01, 0xf8}},
   1679 	{ASYSCALL, ynone, Px, [23]uint8{0x0f, 0x05}}, /* fast syscall */
   1680 	{ATESTB, yxorb, Pb, [23]uint8{0xa8, 0xf6, 00, 0x84, 0x84}},
   1681 	{ATESTL, ytestl, Px, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1682 	{ATESTQ, ytestl, Pw, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1683 	{ATESTW, ytestl, Pe, [23]uint8{0xa9, 0xf7, 00, 0x85, 0x85}},
   1684 	{obj.ATEXT, ytext, Px, [23]uint8{}},
   1685 	{AUCOMISD, yxm, Pe, [23]uint8{0x2e}},
   1686 	{AUCOMISS, yxm, Pm, [23]uint8{0x2e}},
   1687 	{AUNPCKHPD, yxm, Pe, [23]uint8{0x15}},
   1688 	{AUNPCKHPS, yxm, Pm, [23]uint8{0x15}},
   1689 	{AUNPCKLPD, yxm, Pe, [23]uint8{0x14}},
   1690 	{AUNPCKLPS, yxm, Pm, [23]uint8{0x14}},
   1691 	{AVERR, ydivl, Pm, [23]uint8{0x00, 04}},
   1692 	{AVERW, ydivl, Pm, [23]uint8{0x00, 05}},
   1693 	{AWAIT, ynone, Px, [23]uint8{0x9b}},
   1694 	{AWORD, ybyte, Px, [23]uint8{2}},
   1695 	{AXCHGB, yml_mb, Pb, [23]uint8{0x86, 0x86}},
   1696 	{AXCHGL, yxchg, Px, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1697 	{AXCHGQ, yxchg, Pw, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1698 	{AXCHGW, yxchg, Pe, [23]uint8{0x90, 0x90, 0x87, 0x87}},
   1699 	{AXLAT, ynone, Px, [23]uint8{0xd7}},
   1700 	{AXORB, yxorb, Pb, [23]uint8{0x34, 0x80, 06, 0x30, 0x32}},
   1701 	{AXORL, yaddl, Px, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1702 	{AXORPD, yxm, Pe, [23]uint8{0x57}},
   1703 	{AXORPS, yxm, Pm, [23]uint8{0x57}},
   1704 	{AXORQ, yaddl, Pw, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1705 	{AXORW, yaddl, Pe, [23]uint8{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
   1706 	{AFMOVB, yfmvx, Px, [23]uint8{0xdf, 04}},
   1707 	{AFMOVBP, yfmvp, Px, [23]uint8{0xdf, 06}},
   1708 	{AFMOVD, yfmvd, Px, [23]uint8{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
   1709 	{AFMOVDP, yfmvdp, Px, [23]uint8{0xdd, 03, 0xdd, 03}},
   1710 	{AFMOVF, yfmvf, Px, [23]uint8{0xd9, 00, 0xd9, 02}},
   1711 	{AFMOVFP, yfmvp, Px, [23]uint8{0xd9, 03}},
   1712 	{AFMOVL, yfmvf, Px, [23]uint8{0xdb, 00, 0xdb, 02}},
   1713 	{AFMOVLP, yfmvp, Px, [23]uint8{0xdb, 03}},
   1714 	{AFMOVV, yfmvx, Px, [23]uint8{0xdf, 05}},
   1715 	{AFMOVVP, yfmvp, Px, [23]uint8{0xdf, 07}},
   1716 	{AFMOVW, yfmvf, Px, [23]uint8{0xdf, 00, 0xdf, 02}},
   1717 	{AFMOVWP, yfmvp, Px, [23]uint8{0xdf, 03}},
   1718 	{AFMOVX, yfmvx, Px, [23]uint8{0xdb, 05}},
   1719 	{AFMOVXP, yfmvp, Px, [23]uint8{0xdb, 07}},
   1720 	{AFCMOVCC, yfcmv, Px, [23]uint8{0xdb, 00}},
   1721 	{AFCMOVCS, yfcmv, Px, [23]uint8{0xda, 00}},
   1722 	{AFCMOVEQ, yfcmv, Px, [23]uint8{0xda, 01}},
   1723 	{AFCMOVHI, yfcmv, Px, [23]uint8{0xdb, 02}},
   1724 	{AFCMOVLS, yfcmv, Px, [23]uint8{0xda, 02}},
   1725 	{AFCMOVNE, yfcmv, Px, [23]uint8{0xdb, 01}},
   1726 	{AFCMOVNU, yfcmv, Px, [23]uint8{0xdb, 03}},
   1727 	{AFCMOVUN, yfcmv, Px, [23]uint8{0xda, 03}},
   1728 	{AFCOMD, yfadd, Px, [23]uint8{0xdc, 02, 0xd8, 02, 0xdc, 02}},  /* botch */
   1729 	{AFCOMDP, yfadd, Px, [23]uint8{0xdc, 03, 0xd8, 03, 0xdc, 03}}, /* botch */
   1730 	{AFCOMDPP, ycompp, Px, [23]uint8{0xde, 03}},
   1731 	{AFCOMF, yfmvx, Px, [23]uint8{0xd8, 02}},
   1732 	{AFCOMFP, yfmvx, Px, [23]uint8{0xd8, 03}},
   1733 	{AFCOMI, yfmvx, Px, [23]uint8{0xdb, 06}},
   1734 	{AFCOMIP, yfmvx, Px, [23]uint8{0xdf, 06}},
   1735 	{AFCOML, yfmvx, Px, [23]uint8{0xda, 02}},
   1736 	{AFCOMLP, yfmvx, Px, [23]uint8{0xda, 03}},
   1737 	{AFCOMW, yfmvx, Px, [23]uint8{0xde, 02}},
   1738 	{AFCOMWP, yfmvx, Px, [23]uint8{0xde, 03}},
   1739 	{AFUCOM, ycompp, Px, [23]uint8{0xdd, 04}},
   1740 	{AFUCOMI, ycompp, Px, [23]uint8{0xdb, 05}},
   1741 	{AFUCOMIP, ycompp, Px, [23]uint8{0xdf, 05}},
   1742 	{AFUCOMP, ycompp, Px, [23]uint8{0xdd, 05}},
   1743 	{AFUCOMPP, ycompp, Px, [23]uint8{0xda, 13}},
   1744 	{AFADDDP, ycompp, Px, [23]uint8{0xde, 00}},
   1745 	{AFADDW, yfmvx, Px, [23]uint8{0xde, 00}},
   1746 	{AFADDL, yfmvx, Px, [23]uint8{0xda, 00}},
   1747 	{AFADDF, yfmvx, Px, [23]uint8{0xd8, 00}},
   1748 	{AFADDD, yfadd, Px, [23]uint8{0xdc, 00, 0xd8, 00, 0xdc, 00}},
   1749 	{AFMULDP, ycompp, Px, [23]uint8{0xde, 01}},
   1750 	{AFMULW, yfmvx, Px, [23]uint8{0xde, 01}},
   1751 	{AFMULL, yfmvx, Px, [23]uint8{0xda, 01}},
   1752 	{AFMULF, yfmvx, Px, [23]uint8{0xd8, 01}},
   1753 	{AFMULD, yfadd, Px, [23]uint8{0xdc, 01, 0xd8, 01, 0xdc, 01}},
   1754 	{AFSUBDP, ycompp, Px, [23]uint8{0xde, 05}},
   1755 	{AFSUBW, yfmvx, Px, [23]uint8{0xde, 04}},
   1756 	{AFSUBL, yfmvx, Px, [23]uint8{0xda, 04}},
   1757 	{AFSUBF, yfmvx, Px, [23]uint8{0xd8, 04}},
   1758 	{AFSUBD, yfadd, Px, [23]uint8{0xdc, 04, 0xd8, 04, 0xdc, 05}},
   1759 	{AFSUBRDP, ycompp, Px, [23]uint8{0xde, 04}},
   1760 	{AFSUBRW, yfmvx, Px, [23]uint8{0xde, 05}},
   1761 	{AFSUBRL, yfmvx, Px, [23]uint8{0xda, 05}},
   1762 	{AFSUBRF, yfmvx, Px, [23]uint8{0xd8, 05}},
   1763 	{AFSUBRD, yfadd, Px, [23]uint8{0xdc, 05, 0xd8, 05, 0xdc, 04}},
   1764 	{AFDIVDP, ycompp, Px, [23]uint8{0xde, 07}},
   1765 	{AFDIVW, yfmvx, Px, [23]uint8{0xde, 06}},
   1766 	{AFDIVL, yfmvx, Px, [23]uint8{0xda, 06}},
   1767 	{AFDIVF, yfmvx, Px, [23]uint8{0xd8, 06}},
   1768 	{AFDIVD, yfadd, Px, [23]uint8{0xdc, 06, 0xd8, 06, 0xdc, 07}},
   1769 	{AFDIVRDP, ycompp, Px, [23]uint8{0xde, 06}},
   1770 	{AFDIVRW, yfmvx, Px, [23]uint8{0xde, 07}},
   1771 	{AFDIVRL, yfmvx, Px, [23]uint8{0xda, 07}},
   1772 	{AFDIVRF, yfmvx, Px, [23]uint8{0xd8, 07}},
   1773 	{AFDIVRD, yfadd, Px, [23]uint8{0xdc, 07, 0xd8, 07, 0xdc, 06}},
   1774 	{AFXCHD, yfxch, Px, [23]uint8{0xd9, 01, 0xd9, 01}},
   1775 	{AFFREE, nil, 0, [23]uint8{}},
   1776 	{AFLDCW, ysvrs_mo, Px, [23]uint8{0xd9, 05, 0xd9, 05}},
   1777 	{AFLDENV, ysvrs_mo, Px, [23]uint8{0xd9, 04, 0xd9, 04}},
   1778 	{AFRSTOR, ysvrs_mo, Px, [23]uint8{0xdd, 04, 0xdd, 04}},
   1779 	{AFSAVE, ysvrs_om, Px, [23]uint8{0xdd, 06, 0xdd, 06}},
   1780 	{AFSTCW, ysvrs_om, Px, [23]uint8{0xd9, 07, 0xd9, 07}},
   1781 	{AFSTENV, ysvrs_om, Px, [23]uint8{0xd9, 06, 0xd9, 06}},
   1782 	{AFSTSW, ystsw, Px, [23]uint8{0xdd, 07, 0xdf, 0xe0}},
   1783 	{AF2XM1, ynone, Px, [23]uint8{0xd9, 0xf0}},
   1784 	{AFABS, ynone, Px, [23]uint8{0xd9, 0xe1}},
   1785 	{AFCHS, ynone, Px, [23]uint8{0xd9, 0xe0}},
   1786 	{AFCLEX, ynone, Px, [23]uint8{0xdb, 0xe2}},
   1787 	{AFCOS, ynone, Px, [23]uint8{0xd9, 0xff}},
   1788 	{AFDECSTP, ynone, Px, [23]uint8{0xd9, 0xf6}},
   1789 	{AFINCSTP, ynone, Px, [23]uint8{0xd9, 0xf7}},
   1790 	{AFINIT, ynone, Px, [23]uint8{0xdb, 0xe3}},
   1791 	{AFLD1, ynone, Px, [23]uint8{0xd9, 0xe8}},
   1792 	{AFLDL2E, ynone, Px, [23]uint8{0xd9, 0xea}},
   1793 	{AFLDL2T, ynone, Px, [23]uint8{0xd9, 0xe9}},
   1794 	{AFLDLG2, ynone, Px, [23]uint8{0xd9, 0xec}},
   1795 	{AFLDLN2, ynone, Px, [23]uint8{0xd9, 0xed}},
   1796 	{AFLDPI, ynone, Px, [23]uint8{0xd9, 0xeb}},
   1797 	{AFLDZ, ynone, Px, [23]uint8{0xd9, 0xee}},
   1798 	{AFNOP, ynone, Px, [23]uint8{0xd9, 0xd0}},
   1799 	{AFPATAN, ynone, Px, [23]uint8{0xd9, 0xf3}},
   1800 	{AFPREM, ynone, Px, [23]uint8{0xd9, 0xf8}},
   1801 	{AFPREM1, ynone, Px, [23]uint8{0xd9, 0xf5}},
   1802 	{AFPTAN, ynone, Px, [23]uint8{0xd9, 0xf2}},
   1803 	{AFRNDINT, ynone, Px, [23]uint8{0xd9, 0xfc}},
   1804 	{AFSCALE, ynone, Px, [23]uint8{0xd9, 0xfd}},
   1805 	{AFSIN, ynone, Px, [23]uint8{0xd9, 0xfe}},
   1806 	{AFSINCOS, ynone, Px, [23]uint8{0xd9, 0xfb}},
   1807 	{AFSQRT, ynone, Px, [23]uint8{0xd9, 0xfa}},
   1808 	{AFTST, ynone, Px, [23]uint8{0xd9, 0xe4}},
   1809 	{AFXAM, ynone, Px, [23]uint8{0xd9, 0xe5}},
   1810 	{AFXTRACT, ynone, Px, [23]uint8{0xd9, 0xf4}},
   1811 	{AFYL2X, ynone, Px, [23]uint8{0xd9, 0xf1}},
   1812 	{AFYL2XP1, ynone, Px, [23]uint8{0xd9, 0xf9}},
   1813 	{ACMPXCHGB, yrb_mb, Pb, [23]uint8{0x0f, 0xb0}},
   1814 	{ACMPXCHGL, yrl_ml, Px, [23]uint8{0x0f, 0xb1}},
   1815 	{ACMPXCHGW, yrl_ml, Pe, [23]uint8{0x0f, 0xb1}},
   1816 	{ACMPXCHGQ, yrl_ml, Pw, [23]uint8{0x0f, 0xb1}},
   1817 	{ACMPXCHG8B, yscond, Pm, [23]uint8{0xc7, 01}},
   1818 	{AINVD, ynone, Pm, [23]uint8{0x08}},
   1819 	{AINVLPG, ydivb, Pm, [23]uint8{0x01, 07}},
   1820 	{ALFENCE, ynone, Pm, [23]uint8{0xae, 0xe8}},
   1821 	{AMFENCE, ynone, Pm, [23]uint8{0xae, 0xf0}},
   1822 	{AMOVNTIL, yrl_ml, Pm, [23]uint8{0xc3}},
   1823 	{AMOVNTIQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc3}},
   1824 	{ARDMSR, ynone, Pm, [23]uint8{0x32}},
   1825 	{ARDPMC, ynone, Pm, [23]uint8{0x33}},
   1826 	{ARDTSC, ynone, Pm, [23]uint8{0x31}},
   1827 	{ARSM, ynone, Pm, [23]uint8{0xaa}},
   1828 	{ASFENCE, ynone, Pm, [23]uint8{0xae, 0xf8}},
   1829 	{ASYSRET, ynone, Pm, [23]uint8{0x07}},
   1830 	{AWBINVD, ynone, Pm, [23]uint8{0x09}},
   1831 	{AWRMSR, ynone, Pm, [23]uint8{0x30}},
   1832 	{AXADDB, yrb_mb, Pb, [23]uint8{0x0f, 0xc0}},
   1833 	{AXADDL, yrl_ml, Px, [23]uint8{0x0f, 0xc1}},
   1834 	{AXADDQ, yrl_ml, Pw, [23]uint8{0x0f, 0xc1}},
   1835 	{AXADDW, yrl_ml, Pe, [23]uint8{0x0f, 0xc1}},
   1836 	{ACRC32B, ycrc32l, Px, [23]uint8{0xf2, 0x0f, 0x38, 0xf0, 0}},
   1837 	{ACRC32Q, ycrc32l, Pw, [23]uint8{0xf2, 0x0f, 0x38, 0xf1, 0}},
   1838 	{APREFETCHT0, yprefetch, Pm, [23]uint8{0x18, 01}},
   1839 	{APREFETCHT1, yprefetch, Pm, [23]uint8{0x18, 02}},
   1840 	{APREFETCHT2, yprefetch, Pm, [23]uint8{0x18, 03}},
   1841 	{APREFETCHNTA, yprefetch, Pm, [23]uint8{0x18, 00}},
   1842 	{AMOVQL, yrl_ml, Px, [23]uint8{0x89}},
   1843 	{obj.AUNDEF, ynone, Px, [23]uint8{0x0f, 0x0b}},
   1844 	{AAESENC, yaes, Pq, [23]uint8{0x38, 0xdc, 0}},
   1845 	{AAESENCLAST, yaes, Pq, [23]uint8{0x38, 0xdd, 0}},
   1846 	{AAESDEC, yaes, Pq, [23]uint8{0x38, 0xde, 0}},
   1847 	{AAESDECLAST, yaes, Pq, [23]uint8{0x38, 0xdf, 0}},
   1848 	{AAESIMC, yaes, Pq, [23]uint8{0x38, 0xdb, 0}},
   1849 	{AAESKEYGENASSIST, yxshuf, Pq, [23]uint8{0x3a, 0xdf, 0}},
   1850 	{AROUNDPD, yxshuf, Pq, [23]uint8{0x3a, 0x09, 0}},
   1851 	{AROUNDPS, yxshuf, Pq, [23]uint8{0x3a, 0x08, 0}},
   1852 	{AROUNDSD, yxshuf, Pq, [23]uint8{0x3a, 0x0b, 0}},
   1853 	{AROUNDSS, yxshuf, Pq, [23]uint8{0x3a, 0x0a, 0}},
   1854 	{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
   1855 	{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
   1856 	{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
   1857 	{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
   1858 	{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
   1859 	{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
   1860 	{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
   1861 
   1862 	{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
   1863 	{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
   1864 	{AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
   1865 	{AXRELEASE, ynone, Px, [23]uint8{0xf3}},
   1866 	{AXBEGIN, yxbegin, Px, [23]uint8{0xc7, 0xf8}},
   1867 	{AXABORT, yxabort, Px, [23]uint8{0xc6, 0xf8}},
   1868 	{AXEND, ynone, Px, [23]uint8{0x0f, 01, 0xd5}},
   1869 	{AXTEST, ynone, Px, [23]uint8{0x0f, 01, 0xd6}},
   1870 	{AXGETBV, ynone, Pm, [23]uint8{01, 0xd0}},
   1871 	{obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
   1872 	{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
   1873 	{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
   1874 	{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
   1875 
   1876 	// AVX2 gather instructions.
   1877 	// Added as a part of VSIB support implementation,
   1878 	// when x86avxgen will output these, they will be moved to
   1879 	// vex_optabs.go where they belong.
   1880 	{AVGATHERDPD, yvpgatherdq, Pvex, [23]uint8{
   1881 		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x92,
   1882 		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x92,
   1883 	}},
   1884 	{AVGATHERQPD, yvpgatherqq, Pvex, [23]uint8{
   1885 		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x93,
   1886 		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x93,
   1887 	}},
   1888 	{AVGATHERDPS, yvpgatherqq, Pvex, [23]uint8{
   1889 		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x92,
   1890 		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x92,
   1891 	}},
   1892 	{AVGATHERQPS, yvgatherqps, Pvex, [23]uint8{
   1893 		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x93,
   1894 		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x93,
   1895 	}},
   1896 	{AVPGATHERDD, yvpgatherqq, Pvex, [23]uint8{
   1897 		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x90,
   1898 		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x90,
   1899 	}},
   1900 	{AVPGATHERQD, yvgatherqps, Pvex, [23]uint8{
   1901 		vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x91,
   1902 		vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x91,
   1903 	}},
   1904 	{AVPGATHERDQ, yvpgatherdq, Pvex, [23]uint8{
   1905 		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x90,
   1906 		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x90,
   1907 	}},
   1908 	{AVPGATHERQQ, yvpgatherqq, Pvex, [23]uint8{
   1909 		vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x91,
   1910 		vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x91,
   1911 	}},
   1912 
   1913 	{obj.AEND, nil, 0, [23]uint8{}},
   1914 	{0, nil, 0, [23]uint8{}},
   1915 }
   1916 
   1917 var opindex [(ALAST + 1) & obj.AMask]*Optab
   1918 
   1919 // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
   1920 // This happens on systems like Solaris that call .so functions instead of system calls.
   1921 // It does not seem to be necessary for any other systems. This is probably working
   1922 // around a Solaris-specific bug that should be fixed differently, but we don't know
   1923 // what that bug is. And this does fix it.
   1924 func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
   1925 	if ctxt.Headtype == objabi.Hsolaris {
   1926 		// All the Solaris dynamic imports from libc.so begin with "libc_".
   1927 		return strings.HasPrefix(s.Name, "libc_")
   1928 	}
   1929 	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
   1930 }
   1931 
   1932 // single-instruction no-ops of various lengths.
   1933 // constructed by hand and disassembled with gdb to verify.
   1934 // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
   1935 var nop = [][16]uint8{
   1936 	{0x90},
   1937 	{0x66, 0x90},
   1938 	{0x0F, 0x1F, 0x00},
   1939 	{0x0F, 0x1F, 0x40, 0x00},
   1940 	{0x0F, 0x1F, 0x44, 0x00, 0x00},
   1941 	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
   1942 	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
   1943 	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1944 	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1945 }
   1946 
   1947 // Native Client rejects the repeated 0x66 prefix.
   1948 // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   1949 func fillnop(p []byte, n int) {
   1950 	var m int
   1951 
   1952 	for n > 0 {
   1953 		m = n
   1954 		if m > len(nop) {
   1955 			m = len(nop)
   1956 		}
   1957 		copy(p[:m], nop[m-1][:m])
   1958 		p = p[m:]
   1959 		n -= m
   1960 	}
   1961 }
   1962 
   1963 func naclpad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
   1964 	s.Grow(int64(c) + int64(pad))
   1965 	fillnop(s.P[c:], int(pad))
   1966 	return c + pad
   1967 }
   1968 
   1969 func spadjop(ctxt *obj.Link, p *obj.Prog, l, q obj.As) obj.As {
   1970 	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
   1971 		return l
   1972 	}
   1973 	return q
   1974 }
   1975 
   1976 func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
   1977 	if s.P != nil {
   1978 		return
   1979 	}
   1980 
   1981 	if ycover[0] == 0 {
   1982 		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
   1983 	}
   1984 
   1985 	var asmbuf AsmBuf
   1986 
   1987 	for p := s.Func.Text; p != nil; p = p.Link {
   1988 		if p.To.Type == obj.TYPE_BRANCH {
   1989 			if p.Pcond == nil {
   1990 				p.Pcond = p
   1991 			}
   1992 		}
   1993 		if p.As == AADJSP {
   1994 			p.To.Type = obj.TYPE_REG
   1995 			p.To.Reg = REG_SP
   1996 			v := int32(-p.From.Offset)
   1997 			p.From.Offset = int64(v)
   1998 			p.As = spadjop(ctxt, p, AADDL, AADDQ)
   1999 			if v < 0 {
   2000 				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
   2001 				v = -v
   2002 				p.From.Offset = int64(v)
   2003 			}
   2004 
   2005 			if v == 0 {
   2006 				p.As = obj.ANOP
   2007 			}
   2008 		}
   2009 	}
   2010 
   2011 	var q *obj.Prog
   2012 	var count int64 // rough count of number of instructions
   2013 	for p := s.Func.Text; p != nil; p = p.Link {
   2014 		count++
   2015 		p.Back = 2 // use short branches first time through
   2016 		q = p.Pcond
   2017 		if q != nil && (q.Back&2 != 0) {
   2018 			p.Back |= 1 // backward jump
   2019 			q.Back |= 4 // loop head
   2020 		}
   2021 
   2022 		if p.As == AADJSP {
   2023 			p.To.Type = obj.TYPE_REG
   2024 			p.To.Reg = REG_SP
   2025 			v := int32(-p.From.Offset)
   2026 			p.From.Offset = int64(v)
   2027 			p.As = spadjop(ctxt, p, AADDL, AADDQ)
   2028 			if v < 0 {
   2029 				p.As = spadjop(ctxt, p, ASUBL, ASUBQ)
   2030 				v = -v
   2031 				p.From.Offset = int64(v)
   2032 			}
   2033 
   2034 			if v == 0 {
   2035 				p.As = obj.ANOP
   2036 			}
   2037 		}
   2038 	}
   2039 	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
   2040 
   2041 	n := 0
   2042 	var c int32
   2043 	errors := ctxt.Errors
   2044 	for {
   2045 		loop := int32(0)
   2046 		for i := range s.R {
   2047 			s.R[i] = obj.Reloc{}
   2048 		}
   2049 		s.R = s.R[:0]
   2050 		s.P = s.P[:0]
   2051 		c = 0
   2052 		for p := s.Func.Text; p != nil; p = p.Link {
   2053 			if ctxt.Headtype == objabi.Hnacl && p.Isize > 0 {
   2054 
   2055 				// pad everything to avoid crossing 32-byte boundary
   2056 				if c>>5 != (c+int32(p.Isize)-1)>>5 {
   2057 					c = naclpad(ctxt, s, c, -c&31)
   2058 				}
   2059 
   2060 				// pad call deferreturn to start at 32-byte boundary
   2061 				// so that subtracting 5 in jmpdefer will jump back
   2062 				// to that boundary and rerun the call.
   2063 				if p.As == obj.ACALL && p.To.Sym == deferreturn {
   2064 					c = naclpad(ctxt, s, c, -c&31)
   2065 				}
   2066 
   2067 				// pad call to end at 32-byte boundary
   2068 				if p.As == obj.ACALL {
   2069 					c = naclpad(ctxt, s, c, -(c+int32(p.Isize))&31)
   2070 				}
   2071 
   2072 				// the linker treats REP and STOSQ as different instructions
   2073 				// but in fact the REP is a prefix on the STOSQ.
   2074 				// make sure REP has room for 2 more bytes, so that
   2075 				// padding will not be inserted before the next instruction.
   2076 				if (p.As == AREP || p.As == AREPN) && c>>5 != (c+3-1)>>5 {
   2077 					c = naclpad(ctxt, s, c, -c&31)
   2078 				}
   2079 
   2080 				// same for LOCK.
   2081 				// various instructions follow; the longest is 4 bytes.
   2082 				// give ourselves 8 bytes so as to avoid surprises.
   2083 				if p.As == ALOCK && c>>5 != (c+8-1)>>5 {
   2084 					c = naclpad(ctxt, s, c, -c&31)
   2085 				}
   2086 			}
   2087 
   2088 			if (p.Back&4 != 0) && c&(LoopAlign-1) != 0 {
   2089 				// pad with NOPs
   2090 				v := -c & (LoopAlign - 1)
   2091 
   2092 				if v <= MaxLoopPad {
   2093 					s.Grow(int64(c) + int64(v))
   2094 					fillnop(s.P[c:], int(v))
   2095 					c += v
   2096 				}
   2097 			}
   2098 
   2099 			p.Pc = int64(c)
   2100 
   2101 			// process forward jumps to p
   2102 			for q = p.Rel; q != nil; q = q.Forwd {
   2103 				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
   2104 				if q.Back&2 != 0 { // short
   2105 					if v > 127 {
   2106 						loop++
   2107 						q.Back ^= 2
   2108 					}
   2109 
   2110 					if q.As == AJCXZL || q.As == AXBEGIN {
   2111 						s.P[q.Pc+2] = byte(v)
   2112 					} else {
   2113 						s.P[q.Pc+1] = byte(v)
   2114 					}
   2115 				} else {
   2116 					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
   2117 				}
   2118 			}
   2119 
   2120 			p.Rel = nil
   2121 
   2122 			p.Pc = int64(c)
   2123 			asmbuf.asmins(ctxt, s, p)
   2124 			m := asmbuf.Len()
   2125 			if int(p.Isize) != m {
   2126 				p.Isize = uint8(m)
   2127 				loop++
   2128 			}
   2129 
   2130 			s.Grow(p.Pc + int64(m))
   2131 			copy(s.P[p.Pc:], asmbuf.Bytes())
   2132 			c += int32(m)
   2133 		}
   2134 
   2135 		n++
   2136 		if n > 20 {
   2137 			ctxt.Diag("span must be looping")
   2138 			log.Fatalf("loop")
   2139 		}
   2140 		if loop == 0 {
   2141 			break
   2142 		}
   2143 		if ctxt.Errors > errors {
   2144 			return
   2145 		}
   2146 	}
   2147 
   2148 	if ctxt.Headtype == objabi.Hnacl {
   2149 		c = naclpad(ctxt, s, c, -c&31)
   2150 	}
   2151 
   2152 	s.Size = int64(c)
   2153 
   2154 	if false { /* debug['a'] > 1 */
   2155 		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
   2156 		var i int
   2157 		for i = 0; i < len(s.P); i++ {
   2158 			fmt.Printf(" %.2x", s.P[i])
   2159 			if i%16 == 15 {
   2160 				fmt.Printf("\n  %.6x", uint(i+1))
   2161 			}
   2162 		}
   2163 
   2164 		if i%16 != 0 {
   2165 			fmt.Printf("\n")
   2166 		}
   2167 
   2168 		for i := 0; i < len(s.R); i++ {
   2169 			r := &s.R[i]
   2170 			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
   2171 		}
   2172 	}
   2173 }
   2174 
   2175 func instinit(ctxt *obj.Link) {
   2176 	if ycover[0] != 0 {
   2177 		// Already initialized; stop now.
   2178 		// This happens in the cmd/asm tests,
   2179 		// each of which re-initializes the arch.
   2180 		return
   2181 	}
   2182 
   2183 	switch ctxt.Headtype {
   2184 	case objabi.Hplan9:
   2185 		plan9privates = ctxt.Lookup("_privates")
   2186 	case objabi.Hnacl:
   2187 		deferreturn = ctxt.Lookup("runtime.deferreturn")
   2188 	}
   2189 
   2190 	for i := range vexOptab {
   2191 		c := vexOptab[i].as
   2192 		if opindex[c&obj.AMask] != nil {
   2193 			ctxt.Diag("phase error in vexOptab: %d (%v)", i, c)
   2194 		}
   2195 		opindex[c&obj.AMask] = &vexOptab[i]
   2196 	}
   2197 	for i := 1; optab[i].as != 0; i++ {
   2198 		c := optab[i].as
   2199 		if opindex[c&obj.AMask] != nil {
   2200 			ctxt.Diag("phase error in optab: %d (%v)", i, c)
   2201 		}
   2202 		opindex[c&obj.AMask] = &optab[i]
   2203 	}
   2204 
   2205 	for i := 0; i < Ymax; i++ {
   2206 		ycover[i*Ymax+i] = 1
   2207 	}
   2208 
   2209 	ycover[Yi0*Ymax+Yu2] = 1
   2210 	ycover[Yi1*Ymax+Yu2] = 1
   2211 
   2212 	ycover[Yi0*Ymax+Yi8] = 1
   2213 	ycover[Yi1*Ymax+Yi8] = 1
   2214 	ycover[Yu2*Ymax+Yi8] = 1
   2215 	ycover[Yu7*Ymax+Yi8] = 1
   2216 
   2217 	ycover[Yi0*Ymax+Yu7] = 1
   2218 	ycover[Yi1*Ymax+Yu7] = 1
   2219 	ycover[Yu2*Ymax+Yu7] = 1
   2220 
   2221 	ycover[Yi0*Ymax+Yu8] = 1
   2222 	ycover[Yi1*Ymax+Yu8] = 1
   2223 	ycover[Yu2*Ymax+Yu8] = 1
   2224 	ycover[Yu7*Ymax+Yu8] = 1
   2225 
   2226 	ycover[Yi0*Ymax+Ys32] = 1
   2227 	ycover[Yi1*Ymax+Ys32] = 1
   2228 	ycover[Yu2*Ymax+Ys32] = 1
   2229 	ycover[Yu7*Ymax+Ys32] = 1
   2230 	ycover[Yu8*Ymax+Ys32] = 1
   2231 	ycover[Yi8*Ymax+Ys32] = 1
   2232 
   2233 	ycover[Yi0*Ymax+Yi32] = 1
   2234 	ycover[Yi1*Ymax+Yi32] = 1
   2235 	ycover[Yu2*Ymax+Yi32] = 1
   2236 	ycover[Yu7*Ymax+Yi32] = 1
   2237 	ycover[Yu8*Ymax+Yi32] = 1
   2238 	ycover[Yi8*Ymax+Yi32] = 1
   2239 	ycover[Ys32*Ymax+Yi32] = 1
   2240 
   2241 	ycover[Yi0*Ymax+Yi64] = 1
   2242 	ycover[Yi1*Ymax+Yi64] = 1
   2243 	ycover[Yu7*Ymax+Yi64] = 1
   2244 	ycover[Yu2*Ymax+Yi64] = 1
   2245 	ycover[Yu8*Ymax+Yi64] = 1
   2246 	ycover[Yi8*Ymax+Yi64] = 1
   2247 	ycover[Ys32*Ymax+Yi64] = 1
   2248 	ycover[Yi32*Ymax+Yi64] = 1
   2249 
   2250 	ycover[Yal*Ymax+Yrb] = 1
   2251 	ycover[Ycl*Ymax+Yrb] = 1
   2252 	ycover[Yax*Ymax+Yrb] = 1
   2253 	ycover[Ycx*Ymax+Yrb] = 1
   2254 	ycover[Yrx*Ymax+Yrb] = 1
   2255 	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
   2256 
   2257 	ycover[Ycl*Ymax+Ycx] = 1
   2258 
   2259 	ycover[Yax*Ymax+Yrx] = 1
   2260 	ycover[Ycx*Ymax+Yrx] = 1
   2261 
   2262 	ycover[Yax*Ymax+Yrl] = 1
   2263 	ycover[Ycx*Ymax+Yrl] = 1
   2264 	ycover[Yrx*Ymax+Yrl] = 1
   2265 	ycover[Yrl32*Ymax+Yrl] = 1
   2266 
   2267 	ycover[Yf0*Ymax+Yrf] = 1
   2268 
   2269 	ycover[Yal*Ymax+Ymb] = 1
   2270 	ycover[Ycl*Ymax+Ymb] = 1
   2271 	ycover[Yax*Ymax+Ymb] = 1
   2272 	ycover[Ycx*Ymax+Ymb] = 1
   2273 	ycover[Yrx*Ymax+Ymb] = 1
   2274 	ycover[Yrb*Ymax+Ymb] = 1
   2275 	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
   2276 	ycover[Ym*Ymax+Ymb] = 1
   2277 
   2278 	ycover[Yax*Ymax+Yml] = 1
   2279 	ycover[Ycx*Ymax+Yml] = 1
   2280 	ycover[Yrx*Ymax+Yml] = 1
   2281 	ycover[Yrl*Ymax+Yml] = 1
   2282 	ycover[Yrl32*Ymax+Yml] = 1
   2283 	ycover[Ym*Ymax+Yml] = 1
   2284 
   2285 	ycover[Yax*Ymax+Ymm] = 1
   2286 	ycover[Ycx*Ymax+Ymm] = 1
   2287 	ycover[Yrx*Ymax+Ymm] = 1
   2288 	ycover[Yrl*Ymax+Ymm] = 1
   2289 	ycover[Yrl32*Ymax+Ymm] = 1
   2290 	ycover[Ym*Ymax+Ymm] = 1
   2291 	ycover[Ymr*Ymax+Ymm] = 1
   2292 
   2293 	ycover[Ym*Ymax+Yxm] = 1
   2294 	ycover[Yxr*Ymax+Yxm] = 1
   2295 
   2296 	ycover[Ym*Ymax+Yym] = 1
   2297 	ycover[Yyr*Ymax+Yym] = 1
   2298 
   2299 	for i := 0; i < MAXREG; i++ {
   2300 		reg[i] = -1
   2301 		if i >= REG_AL && i <= REG_R15B {
   2302 			reg[i] = (i - REG_AL) & 7
   2303 			if i >= REG_SPB && i <= REG_DIB {
   2304 				regrex[i] = 0x40
   2305 			}
   2306 			if i >= REG_R8B && i <= REG_R15B {
   2307 				regrex[i] = Rxr | Rxx | Rxb
   2308 			}
   2309 		}
   2310 
   2311 		if i >= REG_AH && i <= REG_BH {
   2312 			reg[i] = 4 + ((i - REG_AH) & 7)
   2313 		}
   2314 		if i >= REG_AX && i <= REG_R15 {
   2315 			reg[i] = (i - REG_AX) & 7
   2316 			if i >= REG_R8 {
   2317 				regrex[i] = Rxr | Rxx | Rxb
   2318 			}
   2319 		}
   2320 
   2321 		if i >= REG_F0 && i <= REG_F0+7 {
   2322 			reg[i] = (i - REG_F0) & 7
   2323 		}
   2324 		if i >= REG_M0 && i <= REG_M0+7 {
   2325 			reg[i] = (i - REG_M0) & 7
   2326 		}
   2327 		if i >= REG_X0 && i <= REG_X0+15 {
   2328 			reg[i] = (i - REG_X0) & 7
   2329 			if i >= REG_X0+8 {
   2330 				regrex[i] = Rxr | Rxx | Rxb
   2331 			}
   2332 		}
   2333 		if i >= REG_Y0 && i <= REG_Y0+15 {
   2334 			reg[i] = (i - REG_Y0) & 7
   2335 			if i >= REG_Y0+8 {
   2336 				regrex[i] = Rxr | Rxx | Rxb
   2337 			}
   2338 		}
   2339 
   2340 		if i >= REG_CR+8 && i <= REG_CR+15 {
   2341 			regrex[i] = Rxr
   2342 		}
   2343 	}
   2344 }
   2345 
   2346 var isAndroid = (objabi.GOOS == "android")
   2347 
   2348 func prefixof(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2349 	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
   2350 		return 0
   2351 	}
   2352 	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
   2353 		switch a.Reg {
   2354 		case REG_CS:
   2355 			return 0x2e
   2356 
   2357 		case REG_DS:
   2358 			return 0x3e
   2359 
   2360 		case REG_ES:
   2361 			return 0x26
   2362 
   2363 		case REG_FS:
   2364 			return 0x64
   2365 
   2366 		case REG_GS:
   2367 			return 0x65
   2368 
   2369 		case REG_TLS:
   2370 			// NOTE: Systems listed here should be only systems that
   2371 			// support direct TLS references like 8(TLS) implemented as
   2372 			// direct references from FS or GS. Systems that require
   2373 			// the initial-exec model, where you load the TLS base into
   2374 			// a register and then index from that register, do not reach
   2375 			// this code and should not be listed.
   2376 			if ctxt.Arch.Family == sys.I386 {
   2377 				switch ctxt.Headtype {
   2378 				default:
   2379 					if isAndroid {
   2380 						return 0x65 // GS
   2381 					}
   2382 					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2383 
   2384 				case objabi.Hdarwin,
   2385 					objabi.Hdragonfly,
   2386 					objabi.Hfreebsd,
   2387 					objabi.Hnetbsd,
   2388 					objabi.Hopenbsd:
   2389 					return 0x65 // GS
   2390 				}
   2391 			}
   2392 
   2393 			switch ctxt.Headtype {
   2394 			default:
   2395 				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
   2396 
   2397 			case objabi.Hlinux:
   2398 				if isAndroid {
   2399 					return 0x64 // FS
   2400 				}
   2401 
   2402 				if ctxt.Flag_shared {
   2403 					log.Fatalf("unknown TLS base register for linux with -shared")
   2404 				} else {
   2405 					return 0x64 // FS
   2406 				}
   2407 
   2408 			case objabi.Hdragonfly,
   2409 				objabi.Hfreebsd,
   2410 				objabi.Hnetbsd,
   2411 				objabi.Hopenbsd,
   2412 				objabi.Hsolaris:
   2413 				return 0x64 // FS
   2414 
   2415 			case objabi.Hdarwin:
   2416 				return 0x65 // GS
   2417 			}
   2418 		}
   2419 	}
   2420 
   2421 	if ctxt.Arch.Family == sys.I386 {
   2422 		if a.Index == REG_TLS && ctxt.Flag_shared {
   2423 			// When building for inclusion into a shared library, an instruction of the form
   2424 			//     MOVL 0(CX)(TLS*1), AX
   2425 			// becomes
   2426 			//     mov %gs:(%ecx), %eax
   2427 			// which assumes that the correct TLS offset has been loaded into %ecx (today
   2428 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2429 			// a shared library the instruction it becomes
   2430 			//     mov 0x0(%ecx), $eax
   2431 			// and a R_TLS_LE relocation, and so does not require a prefix.
   2432 			if a.Offset != 0 {
   2433 				ctxt.Diag("cannot handle non-0 offsets to TLS")
   2434 			}
   2435 			return 0x65 // GS
   2436 		}
   2437 		return 0
   2438 	}
   2439 
   2440 	switch a.Index {
   2441 	case REG_CS:
   2442 		return 0x2e
   2443 
   2444 	case REG_DS:
   2445 		return 0x3e
   2446 
   2447 	case REG_ES:
   2448 		return 0x26
   2449 
   2450 	case REG_TLS:
   2451 		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
   2452 			// When building for inclusion into a shared library, an instruction of the form
   2453 			//     MOV 0(CX)(TLS*1), AX
   2454 			// becomes
   2455 			//     mov %fs:(%rcx), %rax
   2456 			// which assumes that the correct TLS offset has been loaded into %rcx (today
   2457 			// there is only one TLS variable -- g -- so this is OK). When not building for
   2458 			// a shared library the instruction does not require a prefix.
   2459 			if a.Offset != 0 {
   2460 				log.Fatalf("cannot handle non-0 offsets to TLS")
   2461 			}
   2462 			return 0x64
   2463 		}
   2464 
   2465 	case REG_FS:
   2466 		return 0x64
   2467 
   2468 	case REG_GS:
   2469 		return 0x65
   2470 	}
   2471 
   2472 	return 0
   2473 }
   2474 
   2475 func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
   2476 	switch a.Type {
   2477 	case obj.TYPE_NONE:
   2478 		return Ynone
   2479 
   2480 	case obj.TYPE_BRANCH:
   2481 		return Ybr
   2482 
   2483 	case obj.TYPE_INDIR:
   2484 		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
   2485 			return Yindir
   2486 		}
   2487 		return Yxxx
   2488 
   2489 	case obj.TYPE_MEM:
   2490 		if a.Index == REG_SP {
   2491 			// Can't use SP as the index register
   2492 			return Yxxx
   2493 		}
   2494 		if a.Index >= REG_X0 && a.Index <= REG_X15 {
   2495 			if ctxt.Arch.Family == sys.I386 && a.Index > REG_X7 {
   2496 				return Yxxx
   2497 			}
   2498 			return Yxvm
   2499 		}
   2500 		if a.Index >= REG_Y0 && a.Index <= REG_Y15 {
   2501 			if ctxt.Arch.Family == sys.I386 && a.Index > REG_Y7 {
   2502 				return Yxxx
   2503 			}
   2504 			return Yyvm
   2505 		}
   2506 		if ctxt.Arch.Family == sys.AMD64 {
   2507 			// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
   2508 			// where the sign extension doesn't matter).
   2509 			// Note: The latter happens only in assembly, for example crypto/sha1/sha1block_amd64.s.
   2510 			if !(a.Offset == int64(int32(a.Offset)) ||
   2511 				a.Offset == int64(uint32(a.Offset)) && p.As == ALEAL) {
   2512 				return Yxxx
   2513 			}
   2514 			switch a.Name {
   2515 			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
   2516 				// Global variables can't use index registers and their
   2517 				// base register is %rip (%rip is encoded as REG_NONE).
   2518 				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
   2519 					return Yxxx
   2520 				}
   2521 			case obj.NAME_AUTO, obj.NAME_PARAM:
   2522 				// These names must have a base of SP.  The old compiler
   2523 				// uses 0 for the base register. SSA uses REG_SP.
   2524 				if a.Reg != REG_SP && a.Reg != 0 {
   2525 					return Yxxx
   2526 				}
   2527 			case obj.NAME_NONE:
   2528 				// everything is ok
   2529 			default:
   2530 				// unknown name
   2531 				return Yxxx
   2532 			}
   2533 		}
   2534 		return Ym
   2535 
   2536 	case obj.TYPE_ADDR:
   2537 		switch a.Name {
   2538 		case obj.NAME_GOTREF:
   2539 			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
   2540 			return Yxxx
   2541 
   2542 		case obj.NAME_EXTERN,
   2543 			obj.NAME_STATIC:
   2544 			if a.Sym != nil && useAbs(ctxt, a.Sym) {
   2545 				return Yi32
   2546 			}
   2547 			return Yiauto // use pc-relative addressing
   2548 
   2549 		case obj.NAME_AUTO,
   2550 			obj.NAME_PARAM:
   2551 			return Yiauto
   2552 		}
   2553 
   2554 		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
   2555 		// and got Yi32 in an earlier version of this code.
   2556 		// Keep doing that until we fix yduff etc.
   2557 		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
   2558 			return Yi32
   2559 		}
   2560 
   2561 		if a.Sym != nil || a.Name != obj.NAME_NONE {
   2562 			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
   2563 		}
   2564 		fallthrough
   2565 
   2566 		// fall through
   2567 
   2568 	case obj.TYPE_CONST:
   2569 		if a.Sym != nil {
   2570 			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
   2571 		}
   2572 
   2573 		v := a.Offset
   2574 		if ctxt.Arch.Family == sys.I386 {
   2575 			v = int64(int32(v))
   2576 		}
   2577 		if v == 0 {
   2578 			return Yi0
   2579 		}
   2580 		if v == 1 {
   2581 			return Yi1
   2582 		}
   2583 		if v >= 0 && v <= 3 {
   2584 			return Yu2
   2585 		}
   2586 		if v >= 0 && v <= 127 {
   2587 			return Yu7
   2588 		}
   2589 		if v >= 0 && v <= 255 {
   2590 			return Yu8
   2591 		}
   2592 		if v >= -128 && v <= 127 {
   2593 			return Yi8
   2594 		}
   2595 		if ctxt.Arch.Family == sys.I386 {
   2596 			return Yi32
   2597 		}
   2598 		l := int32(v)
   2599 		if int64(l) == v {
   2600 			return Ys32 /* can sign extend */
   2601 		}
   2602 		if v>>32 == 0 {
   2603 			return Yi32 /* unsigned */
   2604 		}
   2605 		return Yi64
   2606 
   2607 	case obj.TYPE_TEXTSIZE:
   2608 		return Ytextsize
   2609 	}
   2610 
   2611 	if a.Type != obj.TYPE_REG {
   2612 		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
   2613 		return Yxxx
   2614 	}
   2615 
   2616 	switch a.Reg {
   2617 	case REG_AL:
   2618 		return Yal
   2619 
   2620 	case REG_AX:
   2621 		return Yax
   2622 
   2623 		/*
   2624 			case REG_SPB:
   2625 		*/
   2626 	case REG_BPB,
   2627 		REG_SIB,
   2628 		REG_DIB,
   2629 		REG_R8B,
   2630 		REG_R9B,
   2631 		REG_R10B,
   2632 		REG_R11B,
   2633 		REG_R12B,
   2634 		REG_R13B,
   2635 		REG_R14B,
   2636 		REG_R15B:
   2637 		if ctxt.Arch.Family == sys.I386 {
   2638 			return Yxxx
   2639 		}
   2640 		fallthrough
   2641 
   2642 	case REG_DL,
   2643 		REG_BL,
   2644 		REG_AH,
   2645 		REG_CH,
   2646 		REG_DH,
   2647 		REG_BH:
   2648 		return Yrb
   2649 
   2650 	case REG_CL:
   2651 		return Ycl
   2652 
   2653 	case REG_CX:
   2654 		return Ycx
   2655 
   2656 	case REG_DX, REG_BX:
   2657 		return Yrx
   2658 
   2659 	case REG_R8, /* not really Yrl */
   2660 		REG_R9,
   2661 		REG_R10,
   2662 		REG_R11,
   2663 		REG_R12,
   2664 		REG_R13,
   2665 		REG_R14,
   2666 		REG_R15:
   2667 		if ctxt.Arch.Family == sys.I386 {
   2668 			return Yxxx
   2669 		}
   2670 		fallthrough
   2671 
   2672 	case REG_SP, REG_BP, REG_SI, REG_DI:
   2673 		if ctxt.Arch.Family == sys.I386 {
   2674 			return Yrl32
   2675 		}
   2676 		return Yrl
   2677 
   2678 	case REG_F0 + 0:
   2679 		return Yf0
   2680 
   2681 	case REG_F0 + 1,
   2682 		REG_F0 + 2,
   2683 		REG_F0 + 3,
   2684 		REG_F0 + 4,
   2685 		REG_F0 + 5,
   2686 		REG_F0 + 6,
   2687 		REG_F0 + 7:
   2688 		return Yrf
   2689 
   2690 	case REG_M0 + 0,
   2691 		REG_M0 + 1,
   2692 		REG_M0 + 2,
   2693 		REG_M0 + 3,
   2694 		REG_M0 + 4,
   2695 		REG_M0 + 5,
   2696 		REG_M0 + 6,
   2697 		REG_M0 + 7:
   2698 		return Ymr
   2699 
   2700 	case REG_X0 + 0,
   2701 		REG_X0 + 1,
   2702 		REG_X0 + 2,
   2703 		REG_X0 + 3,
   2704 		REG_X0 + 4,
   2705 		REG_X0 + 5,
   2706 		REG_X0 + 6,
   2707 		REG_X0 + 7,
   2708 		REG_X0 + 8,
   2709 		REG_X0 + 9,
   2710 		REG_X0 + 10,
   2711 		REG_X0 + 11,
   2712 		REG_X0 + 12,
   2713 		REG_X0 + 13,
   2714 		REG_X0 + 14,
   2715 		REG_X0 + 15:
   2716 		return Yxr
   2717 
   2718 	case REG_Y0 + 0,
   2719 		REG_Y0 + 1,
   2720 		REG_Y0 + 2,
   2721 		REG_Y0 + 3,
   2722 		REG_Y0 + 4,
   2723 		REG_Y0 + 5,
   2724 		REG_Y0 + 6,
   2725 		REG_Y0 + 7,
   2726 		REG_Y0 + 8,
   2727 		REG_Y0 + 9,
   2728 		REG_Y0 + 10,
   2729 		REG_Y0 + 11,
   2730 		REG_Y0 + 12,
   2731 		REG_Y0 + 13,
   2732 		REG_Y0 + 14,
   2733 		REG_Y0 + 15:
   2734 		return Yyr
   2735 
   2736 	case REG_CS:
   2737 		return Ycs
   2738 	case REG_SS:
   2739 		return Yss
   2740 	case REG_DS:
   2741 		return Yds
   2742 	case REG_ES:
   2743 		return Yes
   2744 	case REG_FS:
   2745 		return Yfs
   2746 	case REG_GS:
   2747 		return Ygs
   2748 	case REG_TLS:
   2749 		return Ytls
   2750 
   2751 	case REG_GDTR:
   2752 		return Ygdtr
   2753 	case REG_IDTR:
   2754 		return Yidtr
   2755 	case REG_LDTR:
   2756 		return Yldtr
   2757 	case REG_MSW:
   2758 		return Ymsw
   2759 	case REG_TASK:
   2760 		return Ytask
   2761 
   2762 	case REG_CR + 0:
   2763 		return Ycr0
   2764 	case REG_CR + 1:
   2765 		return Ycr1
   2766 	case REG_CR + 2:
   2767 		return Ycr2
   2768 	case REG_CR + 3:
   2769 		return Ycr3
   2770 	case REG_CR + 4:
   2771 		return Ycr4
   2772 	case REG_CR + 5:
   2773 		return Ycr5
   2774 	case REG_CR + 6:
   2775 		return Ycr6
   2776 	case REG_CR + 7:
   2777 		return Ycr7
   2778 	case REG_CR + 8:
   2779 		return Ycr8
   2780 
   2781 	case REG_DR + 0:
   2782 		return Ydr0
   2783 	case REG_DR + 1:
   2784 		return Ydr1
   2785 	case REG_DR + 2:
   2786 		return Ydr2
   2787 	case REG_DR + 3:
   2788 		return Ydr3
   2789 	case REG_DR + 4:
   2790 		return Ydr4
   2791 	case REG_DR + 5:
   2792 		return Ydr5
   2793 	case REG_DR + 6:
   2794 		return Ydr6
   2795 	case REG_DR + 7:
   2796 		return Ydr7
   2797 
   2798 	case REG_TR + 0:
   2799 		return Ytr0
   2800 	case REG_TR + 1:
   2801 		return Ytr1
   2802 	case REG_TR + 2:
   2803 		return Ytr2
   2804 	case REG_TR + 3:
   2805 		return Ytr3
   2806 	case REG_TR + 4:
   2807 		return Ytr4
   2808 	case REG_TR + 5:
   2809 		return Ytr5
   2810 	case REG_TR + 6:
   2811 		return Ytr6
   2812 	case REG_TR + 7:
   2813 		return Ytr7
   2814 	}
   2815 
   2816 	return Yxxx
   2817 }
   2818 
   2819 // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
   2820 // and hold assembly state.
   2821 type AsmBuf struct {
   2822 	buf     [100]byte
   2823 	off     int
   2824 	rexflag int
   2825 	vexflag int
   2826 	rep     int
   2827 	repn    int
   2828 	lock    bool
   2829 }
   2830 
   2831 // Put1 appends one byte to the end of the buffer.
   2832 func (a *AsmBuf) Put1(x byte) {
   2833 	a.buf[a.off] = x
   2834 	a.off++
   2835 }
   2836 
   2837 // Put2 appends two bytes to the end of the buffer.
   2838 func (a *AsmBuf) Put2(x, y byte) {
   2839 	a.buf[a.off+0] = x
   2840 	a.buf[a.off+1] = y
   2841 	a.off += 2
   2842 }
   2843 
   2844 // Put3 appends three bytes to the end of the buffer.
   2845 func (a *AsmBuf) Put3(x, y, z byte) {
   2846 	a.buf[a.off+0] = x
   2847 	a.buf[a.off+1] = y
   2848 	a.buf[a.off+2] = z
   2849 	a.off += 3
   2850 }
   2851 
   2852 // Put4 appends four bytes to the end of the buffer.
   2853 func (a *AsmBuf) Put4(x, y, z, w byte) {
   2854 	a.buf[a.off+0] = x
   2855 	a.buf[a.off+1] = y
   2856 	a.buf[a.off+2] = z
   2857 	a.buf[a.off+3] = w
   2858 	a.off += 4
   2859 }
   2860 
   2861 // PutInt16 writes v into the buffer using little-endian encoding.
   2862 func (a *AsmBuf) PutInt16(v int16) {
   2863 	a.buf[a.off+0] = byte(v)
   2864 	a.buf[a.off+1] = byte(v >> 8)
   2865 	a.off += 2
   2866 }
   2867 
   2868 // PutInt32 writes v into the buffer using little-endian encoding.
   2869 func (a *AsmBuf) PutInt32(v int32) {
   2870 	a.buf[a.off+0] = byte(v)
   2871 	a.buf[a.off+1] = byte(v >> 8)
   2872 	a.buf[a.off+2] = byte(v >> 16)
   2873 	a.buf[a.off+3] = byte(v >> 24)
   2874 	a.off += 4
   2875 }
   2876 
   2877 // PutInt64 writes v into the buffer using little-endian encoding.
   2878 func (a *AsmBuf) PutInt64(v int64) {
   2879 	a.buf[a.off+0] = byte(v)
   2880 	a.buf[a.off+1] = byte(v >> 8)
   2881 	a.buf[a.off+2] = byte(v >> 16)
   2882 	a.buf[a.off+3] = byte(v >> 24)
   2883 	a.buf[a.off+4] = byte(v >> 32)
   2884 	a.buf[a.off+5] = byte(v >> 40)
   2885 	a.buf[a.off+6] = byte(v >> 48)
   2886 	a.buf[a.off+7] = byte(v >> 56)
   2887 	a.off += 8
   2888 }
   2889 
   2890 // Put copies b into the buffer.
   2891 func (a *AsmBuf) Put(b []byte) {
   2892 	copy(a.buf[a.off:], b)
   2893 	a.off += len(b)
   2894 }
   2895 
   2896 // Insert inserts b at offset i.
   2897 func (a *AsmBuf) Insert(i int, b byte) {
   2898 	a.off++
   2899 	copy(a.buf[i+1:a.off], a.buf[i:a.off-1])
   2900 	a.buf[i] = b
   2901 }
   2902 
   2903 // Last returns the byte at the end of the buffer.
   2904 func (a *AsmBuf) Last() byte { return a.buf[a.off-1] }
   2905 
   2906 // Len returns the length of the buffer.
   2907 func (a *AsmBuf) Len() int { return a.off }
   2908 
   2909 // Bytes returns the contents of the buffer.
   2910 func (a *AsmBuf) Bytes() []byte { return a.buf[:a.off] }
   2911 
   2912 // Reset empties the buffer.
   2913 func (a *AsmBuf) Reset() { a.off = 0 }
   2914 
   2915 // At returns the byte at offset i.
   2916 func (a *AsmBuf) At(i int) byte { return a.buf[i] }
   2917 
   2918 // asmidx emits SIB byte.
   2919 func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
   2920 	var i int
   2921 
   2922 	// X/Y index register is used in VSIB.
   2923 	switch index {
   2924 	default:
   2925 		goto bad
   2926 
   2927 	case REG_NONE:
   2928 		i = 4 << 3
   2929 		goto bas
   2930 
   2931 	case REG_R8,
   2932 		REG_R9,
   2933 		REG_R10,
   2934 		REG_R11,
   2935 		REG_R12,
   2936 		REG_R13,
   2937 		REG_R14,
   2938 		REG_R15,
   2939 		REG_X8,
   2940 		REG_X9,
   2941 		REG_X10,
   2942 		REG_X11,
   2943 		REG_X12,
   2944 		REG_X13,
   2945 		REG_X14,
   2946 		REG_X15,
   2947 		REG_Y8,
   2948 		REG_Y9,
   2949 		REG_Y10,
   2950 		REG_Y11,
   2951 		REG_Y12,
   2952 		REG_Y13,
   2953 		REG_Y14,
   2954 		REG_Y15:
   2955 		if ctxt.Arch.Family == sys.I386 {
   2956 			goto bad
   2957 		}
   2958 		fallthrough
   2959 
   2960 	case REG_AX,
   2961 		REG_CX,
   2962 		REG_DX,
   2963 		REG_BX,
   2964 		REG_BP,
   2965 		REG_SI,
   2966 		REG_DI,
   2967 		REG_X0,
   2968 		REG_X1,
   2969 		REG_X2,
   2970 		REG_X3,
   2971 		REG_X4,
   2972 		REG_X5,
   2973 		REG_X6,
   2974 		REG_X7,
   2975 		REG_Y0,
   2976 		REG_Y1,
   2977 		REG_Y2,
   2978 		REG_Y3,
   2979 		REG_Y4,
   2980 		REG_Y5,
   2981 		REG_Y6,
   2982 		REG_Y7:
   2983 		i = reg[index] << 3
   2984 	}
   2985 
   2986 	switch scale {
   2987 	default:
   2988 		goto bad
   2989 
   2990 	case 1:
   2991 		break
   2992 
   2993 	case 2:
   2994 		i |= 1 << 6
   2995 
   2996 	case 4:
   2997 		i |= 2 << 6
   2998 
   2999 	case 8:
   3000 		i |= 3 << 6
   3001 	}
   3002 
   3003 bas:
   3004 	switch base {
   3005 	default:
   3006 		goto bad
   3007 
   3008 	case REG_NONE: /* must be mod=00 */
   3009 		i |= 5
   3010 
   3011 	case REG_R8,
   3012 		REG_R9,
   3013 		REG_R10,
   3014 		REG_R11,
   3015 		REG_R12,
   3016 		REG_R13,
   3017 		REG_R14,
   3018 		REG_R15:
   3019 		if ctxt.Arch.Family == sys.I386 {
   3020 			goto bad
   3021 		}
   3022 		fallthrough
   3023 
   3024 	case REG_AX,
   3025 		REG_CX,
   3026 		REG_DX,
   3027 		REG_BX,
   3028 		REG_SP,
   3029 		REG_BP,
   3030 		REG_SI,
   3031 		REG_DI:
   3032 		i |= reg[base]
   3033 	}
   3034 
   3035 	asmbuf.Put1(byte(i))
   3036 	return
   3037 
   3038 bad:
   3039 	ctxt.Diag("asmidx: bad address %d/%d/%d", scale, index, base)
   3040 	asmbuf.Put1(0)
   3041 	return
   3042 }
   3043 
   3044 func (asmbuf *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
   3045 	var rel obj.Reloc
   3046 
   3047 	v := vaddr(ctxt, p, a, &rel)
   3048 	if rel.Siz != 0 {
   3049 		if rel.Siz != 4 {
   3050 			ctxt.Diag("bad reloc")
   3051 		}
   3052 		r := obj.Addrel(cursym)
   3053 		*r = rel
   3054 		r.Off = int32(p.Pc + int64(asmbuf.Len()))
   3055 	}
   3056 
   3057 	asmbuf.PutInt32(int32(v))
   3058 }
   3059 
   3060 func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
   3061 	if r != nil {
   3062 		*r = obj.Reloc{}
   3063 	}
   3064 
   3065 	switch a.Name {
   3066 	case obj.NAME_STATIC,
   3067 		obj.NAME_GOTREF,
   3068 		obj.NAME_EXTERN:
   3069 		s := a.Sym
   3070 		if r == nil {
   3071 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   3072 			log.Fatalf("reloc")
   3073 		}
   3074 
   3075 		if a.Name == obj.NAME_GOTREF {
   3076 			r.Siz = 4
   3077 			r.Type = objabi.R_GOTPCREL
   3078 		} else if useAbs(ctxt, s) {
   3079 			r.Siz = 4
   3080 			r.Type = objabi.R_ADDR
   3081 		} else {
   3082 			r.Siz = 4
   3083 			r.Type = objabi.R_PCREL
   3084 		}
   3085 
   3086 		r.Off = -1 // caller must fill in
   3087 		r.Sym = s
   3088 		r.Add = a.Offset
   3089 
   3090 		return 0
   3091 	}
   3092 
   3093 	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
   3094 		if r == nil {
   3095 			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
   3096 			log.Fatalf("reloc")
   3097 		}
   3098 
   3099 		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
   3100 			r.Type = objabi.R_TLS_LE
   3101 			r.Siz = 4
   3102 			r.Off = -1 // caller must fill in
   3103 			r.Add = a.Offset
   3104 		}
   3105 		return 0
   3106 	}
   3107 
   3108 	return a.Offset
   3109 }
   3110 
   3111 func (asmbuf *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
   3112 	var base int
   3113 	var rel obj.Reloc
   3114 
   3115 	rex &= 0x40 | Rxr
   3116 	switch {
   3117 	case int64(int32(a.Offset)) == a.Offset:
   3118 		// Offset fits in sign-extended 32 bits.
   3119 	case int64(uint32(a.Offset)) == a.Offset && asmbuf.rexflag&Rxw == 0:
   3120 		// Offset fits in zero-extended 32 bits in a 32-bit instruction.
   3121 		// This is allowed for assembly that wants to use 32-bit hex
   3122 		// constants, e.g. LEAL 0x99999999(AX), AX.
   3123 	default:
   3124 		ctxt.Diag("offset too large in %s", p)
   3125 	}
   3126 	v := int32(a.Offset)
   3127 	rel.Siz = 0
   3128 
   3129 	switch a.Type {
   3130 	case obj.TYPE_ADDR:
   3131 		if a.Name == obj.NAME_NONE {
   3132 			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
   3133 		}
   3134 		if a.Index == REG_TLS {
   3135 			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
   3136 		}
   3137 		goto bad
   3138 
   3139 	case obj.TYPE_REG:
   3140 		if a.Reg < REG_AL || REG_Y0+15 < a.Reg {
   3141 			goto bad
   3142 		}
   3143 		if v != 0 {
   3144 			goto bad
   3145 		}
   3146 		asmbuf.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
   3147 		asmbuf.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
   3148 		return
   3149 	}
   3150 
   3151 	if a.Type != obj.TYPE_MEM {
   3152 		goto bad
   3153 	}
   3154 
   3155 	if a.Index != REG_NONE && a.Index != REG_TLS {
   3156 		base := int(a.Reg)
   3157 		switch a.Name {
   3158 		case obj.NAME_EXTERN,
   3159 			obj.NAME_GOTREF,
   3160 			obj.NAME_STATIC:
   3161 			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
   3162 				goto bad
   3163 			}
   3164 			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
   3165 				// The base register has already been set. It holds the PC
   3166 				// of this instruction returned by a PC-reading thunk.
   3167 				// See obj6.go:rewriteToPcrel.
   3168 			} else {
   3169 				base = REG_NONE
   3170 			}
   3171 			v = int32(vaddr(ctxt, p, a, &rel))
   3172 
   3173 		case obj.NAME_AUTO,
   3174 			obj.NAME_PARAM:
   3175 			base = REG_SP
   3176 		}
   3177 
   3178 		asmbuf.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
   3179 		if base == REG_NONE {
   3180 			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
   3181 			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3182 			goto putrelv
   3183 		}
   3184 
   3185 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   3186 			asmbuf.Put1(byte(0<<6 | 4<<0 | r<<3))
   3187 			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3188 			return
   3189 		}
   3190 
   3191 		if v >= -128 && v < 128 && rel.Siz == 0 {
   3192 			asmbuf.Put1(byte(1<<6 | 4<<0 | r<<3))
   3193 			asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3194 			asmbuf.Put1(byte(v))
   3195 			return
   3196 		}
   3197 
   3198 		asmbuf.Put1(byte(2<<6 | 4<<0 | r<<3))
   3199 		asmbuf.asmidx(ctxt, int(a.Scale), int(a.Index), base)
   3200 		goto putrelv
   3201 	}
   3202 
   3203 	base = int(a.Reg)
   3204 	switch a.Name {
   3205 	case obj.NAME_STATIC,
   3206 		obj.NAME_GOTREF,
   3207 		obj.NAME_EXTERN:
   3208 		if a.Sym == nil {
   3209 			ctxt.Diag("bad addr: %v", p)
   3210 		}
   3211 		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
   3212 			// The base register has already been set. It holds the PC
   3213 			// of this instruction returned by a PC-reading thunk.
   3214 			// See obj6.go:rewriteToPcrel.
   3215 		} else {
   3216 			base = REG_NONE
   3217 		}
   3218 		v = int32(vaddr(ctxt, p, a, &rel))
   3219 
   3220 	case obj.NAME_AUTO,
   3221 		obj.NAME_PARAM:
   3222 		base = REG_SP
   3223 	}
   3224 
   3225 	if base == REG_TLS {
   3226 		v = int32(vaddr(ctxt, p, a, &rel))
   3227 	}
   3228 
   3229 	asmbuf.rexflag |= regrex[base]&Rxb | rex
   3230 	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
   3231 		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
   3232 			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
   3233 				ctxt.Diag("%v has offset against gotref", p)
   3234 			}
   3235 			asmbuf.Put1(byte(0<<6 | 5<<0 | r<<3))
   3236 			goto putrelv
   3237 		}
   3238 
   3239 		// temporary
   3240 		asmbuf.Put2(
   3241 			byte(0<<6|4<<0|r<<3), // sib present
   3242 			0<<6|4<<3|5<<0,       // DS:d32
   3243 		)
   3244 		goto putrelv
   3245 	}
   3246 
   3247 	if base == REG_SP || base == REG_R12 {
   3248 		if v == 0 {
   3249 			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   3250 			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3251 			return
   3252 		}
   3253 
   3254 		if v >= -128 && v < 128 {
   3255 			asmbuf.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
   3256 			asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3257 			asmbuf.Put1(byte(v))
   3258 			return
   3259 		}
   3260 
   3261 		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   3262 		asmbuf.asmidx(ctxt, int(a.Scale), REG_NONE, base)
   3263 		goto putrelv
   3264 	}
   3265 
   3266 	if REG_AX <= base && base <= REG_R15 {
   3267 		if a.Index == REG_TLS && !ctxt.Flag_shared {
   3268 			rel = obj.Reloc{}
   3269 			rel.Type = objabi.R_TLS_LE
   3270 			rel.Siz = 4
   3271 			rel.Sym = nil
   3272 			rel.Add = int64(v)
   3273 			v = 0
   3274 		}
   3275 
   3276 		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
   3277 			asmbuf.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
   3278 			return
   3279 		}
   3280 
   3281 		if v >= -128 && v < 128 && rel.Siz == 0 {
   3282 			asmbuf.Put2(byte(1<<6|reg[base]<<0|r<<3), byte(v))
   3283 			return
   3284 		}
   3285 
   3286 		asmbuf.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
   3287 		goto putrelv
   3288 	}
   3289 
   3290 	goto bad
   3291 
   3292 putrelv:
   3293 	if rel.Siz != 0 {
   3294 		if rel.Siz != 4 {
   3295 			ctxt.Diag("bad rel")
   3296 			goto bad
   3297 		}
   3298 
   3299 		r := obj.Addrel(cursym)
   3300 		*r = rel
   3301 		r.Off = int32(p.Pc + int64(asmbuf.Len()))
   3302 	}
   3303 
   3304 	asmbuf.PutInt32(v)
   3305 	return
   3306 
   3307 bad:
   3308 	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
   3309 	return
   3310 }
   3311 
   3312 func (asmbuf *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
   3313 	asmbuf.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
   3314 }
   3315 
   3316 func (asmbuf *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
   3317 	asmbuf.asmandsz(ctxt, cursym, p, a, o, 0, 0)
   3318 }
   3319 
   3320 func bytereg(a *obj.Addr, t *uint8) {
   3321 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
   3322 		a.Reg += REG_AL - REG_AX
   3323 		*t = 0
   3324 	}
   3325 }
   3326 
   3327 func unbytereg(a *obj.Addr, t *uint8) {
   3328 	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
   3329 		a.Reg += REG_AX - REG_AL
   3330 		*t = 0
   3331 	}
   3332 }
   3333 
   3334 const (
   3335 	E = 0xff
   3336 )
   3337 
   3338 var ymovtab = []Movtab{
   3339 	/* push */
   3340 	{APUSHL, Ycs, Ynone, Ynone, 0, [4]uint8{0x0e, E, 0, 0}},
   3341 	{APUSHL, Yss, Ynone, Ynone, 0, [4]uint8{0x16, E, 0, 0}},
   3342 	{APUSHL, Yds, Ynone, Ynone, 0, [4]uint8{0x1e, E, 0, 0}},
   3343 	{APUSHL, Yes, Ynone, Ynone, 0, [4]uint8{0x06, E, 0, 0}},
   3344 	{APUSHL, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   3345 	{APUSHL, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   3346 	{APUSHQ, Yfs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa0, E, 0}},
   3347 	{APUSHQ, Ygs, Ynone, Ynone, 0, [4]uint8{0x0f, 0xa8, E, 0}},
   3348 	{APUSHW, Ycs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0e, E, 0}},
   3349 	{APUSHW, Yss, Ynone, Ynone, 0, [4]uint8{Pe, 0x16, E, 0}},
   3350 	{APUSHW, Yds, Ynone, Ynone, 0, [4]uint8{Pe, 0x1e, E, 0}},
   3351 	{APUSHW, Yes, Ynone, Ynone, 0, [4]uint8{Pe, 0x06, E, 0}},
   3352 	{APUSHW, Yfs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa0, E}},
   3353 	{APUSHW, Ygs, Ynone, Ynone, 0, [4]uint8{Pe, 0x0f, 0xa8, E}},
   3354 
   3355 	/* pop */
   3356 	{APOPL, Ynone, Ynone, Yds, 0, [4]uint8{0x1f, E, 0, 0}},
   3357 	{APOPL, Ynone, Ynone, Yes, 0, [4]uint8{0x07, E, 0, 0}},
   3358 	{APOPL, Ynone, Ynone, Yss, 0, [4]uint8{0x17, E, 0, 0}},
   3359 	{APOPL, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   3360 	{APOPL, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   3361 	{APOPQ, Ynone, Ynone, Yfs, 0, [4]uint8{0x0f, 0xa1, E, 0}},
   3362 	{APOPQ, Ynone, Ynone, Ygs, 0, [4]uint8{0x0f, 0xa9, E, 0}},
   3363 	{APOPW, Ynone, Ynone, Yds, 0, [4]uint8{Pe, 0x1f, E, 0}},
   3364 	{APOPW, Ynone, Ynone, Yes, 0, [4]uint8{Pe, 0x07, E, 0}},
   3365 	{APOPW, Ynone, Ynone, Yss, 0, [4]uint8{Pe, 0x17, E, 0}},
   3366 	{APOPW, Ynone, Ynone, Yfs, 0, [4]uint8{Pe, 0x0f, 0xa1, E}},
   3367 	{APOPW, Ynone, Ynone, Ygs, 0, [4]uint8{Pe, 0x0f, 0xa9, E}},
   3368 
   3369 	/* mov seg */
   3370 	{AMOVW, Yes, Ynone, Yml, 1, [4]uint8{0x8c, 0, 0, 0}},
   3371 	{AMOVW, Ycs, Ynone, Yml, 1, [4]uint8{0x8c, 1, 0, 0}},
   3372 	{AMOVW, Yss, Ynone, Yml, 1, [4]uint8{0x8c, 2, 0, 0}},
   3373 	{AMOVW, Yds, Ynone, Yml, 1, [4]uint8{0x8c, 3, 0, 0}},
   3374 	{AMOVW, Yfs, Ynone, Yml, 1, [4]uint8{0x8c, 4, 0, 0}},
   3375 	{AMOVW, Ygs, Ynone, Yml, 1, [4]uint8{0x8c, 5, 0, 0}},
   3376 	{AMOVW, Yml, Ynone, Yes, 2, [4]uint8{0x8e, 0, 0, 0}},
   3377 	{AMOVW, Yml, Ynone, Ycs, 2, [4]uint8{0x8e, 1, 0, 0}},
   3378 	{AMOVW, Yml, Ynone, Yss, 2, [4]uint8{0x8e, 2, 0, 0}},
   3379 	{AMOVW, Yml, Ynone, Yds, 2, [4]uint8{0x8e, 3, 0, 0}},
   3380 	{AMOVW, Yml, Ynone, Yfs, 2, [4]uint8{0x8e, 4, 0, 0}},
   3381 	{AMOVW, Yml, Ynone, Ygs, 2, [4]uint8{0x8e, 5, 0, 0}},
   3382 
   3383 	/* mov cr */
   3384 	{AMOVL, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   3385 	{AMOVL, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   3386 	{AMOVL, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   3387 	{AMOVL, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   3388 	{AMOVL, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   3389 	{AMOVQ, Ycr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 0, 0}},
   3390 	{AMOVQ, Ycr2, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 2, 0}},
   3391 	{AMOVQ, Ycr3, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 3, 0}},
   3392 	{AMOVQ, Ycr4, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 4, 0}},
   3393 	{AMOVQ, Ycr8, Ynone, Yml, 3, [4]uint8{0x0f, 0x20, 8, 0}},
   3394 	{AMOVL, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   3395 	{AMOVL, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   3396 	{AMOVL, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   3397 	{AMOVL, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   3398 	{AMOVL, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   3399 	{AMOVQ, Yml, Ynone, Ycr0, 4, [4]uint8{0x0f, 0x22, 0, 0}},
   3400 	{AMOVQ, Yml, Ynone, Ycr2, 4, [4]uint8{0x0f, 0x22, 2, 0}},
   3401 	{AMOVQ, Yml, Ynone, Ycr3, 4, [4]uint8{0x0f, 0x22, 3, 0}},
   3402 	{AMOVQ, Yml, Ynone, Ycr4, 4, [4]uint8{0x0f, 0x22, 4, 0}},
   3403 	{AMOVQ, Yml, Ynone, Ycr8, 4, [4]uint8{0x0f, 0x22, 8, 0}},
   3404 
   3405 	/* mov dr */
   3406 	{AMOVL, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   3407 	{AMOVL, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   3408 	{AMOVL, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   3409 	{AMOVQ, Ydr0, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 0, 0}},
   3410 	{AMOVQ, Ydr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 6, 0}},
   3411 	{AMOVQ, Ydr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x21, 7, 0}},
   3412 	{AMOVL, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   3413 	{AMOVL, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   3414 	{AMOVL, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   3415 	{AMOVQ, Yml, Ynone, Ydr0, 4, [4]uint8{0x0f, 0x23, 0, 0}},
   3416 	{AMOVQ, Yml, Ynone, Ydr6, 4, [4]uint8{0x0f, 0x23, 6, 0}},
   3417 	{AMOVQ, Yml, Ynone, Ydr7, 4, [4]uint8{0x0f, 0x23, 7, 0}},
   3418 
   3419 	/* mov tr */
   3420 	{AMOVL, Ytr6, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 6, 0}},
   3421 	{AMOVL, Ytr7, Ynone, Yml, 3, [4]uint8{0x0f, 0x24, 7, 0}},
   3422 	{AMOVL, Yml, Ynone, Ytr6, 4, [4]uint8{0x0f, 0x26, 6, E}},
   3423 	{AMOVL, Yml, Ynone, Ytr7, 4, [4]uint8{0x0f, 0x26, 7, E}},
   3424 
   3425 	/* lgdt, sgdt, lidt, sidt */
   3426 	{AMOVL, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   3427 	{AMOVL, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   3428 	{AMOVL, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   3429 	{AMOVL, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   3430 	{AMOVQ, Ym, Ynone, Ygdtr, 4, [4]uint8{0x0f, 0x01, 2, 0}},
   3431 	{AMOVQ, Ygdtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 0, 0}},
   3432 	{AMOVQ, Ym, Ynone, Yidtr, 4, [4]uint8{0x0f, 0x01, 3, 0}},
   3433 	{AMOVQ, Yidtr, Ynone, Ym, 3, [4]uint8{0x0f, 0x01, 1, 0}},
   3434 
   3435 	/* lldt, sldt */
   3436 	{AMOVW, Yml, Ynone, Yldtr, 4, [4]uint8{0x0f, 0x00, 2, 0}},
   3437 	{AMOVW, Yldtr, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 0, 0}},
   3438 
   3439 	/* lmsw, smsw */
   3440 	{AMOVW, Yml, Ynone, Ymsw, 4, [4]uint8{0x0f, 0x01, 6, 0}},
   3441 	{AMOVW, Ymsw, Ynone, Yml, 3, [4]uint8{0x0f, 0x01, 4, 0}},
   3442 
   3443 	/* ltr, str */
   3444 	{AMOVW, Yml, Ynone, Ytask, 4, [4]uint8{0x0f, 0x00, 3, 0}},
   3445 	{AMOVW, Ytask, Ynone, Yml, 3, [4]uint8{0x0f, 0x00, 1, 0}},
   3446 
   3447 	/* load full pointer - unsupported
   3448 	Movtab{AMOVL, Yml, Ycol, 5, [4]uint8{0, 0, 0, 0}},
   3449 	Movtab{AMOVW, Yml, Ycol, 5, [4]uint8{Pe, 0, 0, 0}},
   3450 	*/
   3451 
   3452 	/* double shift */
   3453 	{ASHLL, Yi8, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3454 	{ASHLL, Ycl, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3455 	{ASHLL, Ycx, Yrl, Yml, 6, [4]uint8{0xa4, 0xa5, 0, 0}},
   3456 	{ASHRL, Yi8, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3457 	{ASHRL, Ycl, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3458 	{ASHRL, Ycx, Yrl, Yml, 6, [4]uint8{0xac, 0xad, 0, 0}},
   3459 	{ASHLQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3460 	{ASHLQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3461 	{ASHLQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xa4, 0xa5, 0}},
   3462 	{ASHRQ, Yi8, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3463 	{ASHRQ, Ycl, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3464 	{ASHRQ, Ycx, Yrl, Yml, 6, [4]uint8{Pw, 0xac, 0xad, 0}},
   3465 	{ASHLW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3466 	{ASHLW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3467 	{ASHLW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xa4, 0xa5, 0}},
   3468 	{ASHRW, Yi8, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3469 	{ASHRW, Ycl, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3470 	{ASHRW, Ycx, Yrl, Yml, 6, [4]uint8{Pe, 0xac, 0xad, 0}},
   3471 
   3472 	/* load TLS base */
   3473 	{AMOVL, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   3474 	{AMOVQ, Ytls, Ynone, Yrl, 7, [4]uint8{0, 0, 0, 0}},
   3475 	{0, 0, 0, 0, 0, [4]uint8{}},
   3476 }
   3477 
   3478 func isax(a *obj.Addr) bool {
   3479 	switch a.Reg {
   3480 	case REG_AX, REG_AL, REG_AH:
   3481 		return true
   3482 	}
   3483 
   3484 	if a.Index == REG_AX {
   3485 		return true
   3486 	}
   3487 	return false
   3488 }
   3489 
   3490 func subreg(p *obj.Prog, from int, to int) {
   3491 	if false { /* debug['Q'] */
   3492 		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
   3493 	}
   3494 
   3495 	if int(p.From.Reg) == from {
   3496 		p.From.Reg = int16(to)
   3497 		p.Ft = 0
   3498 	}
   3499 
   3500 	if int(p.To.Reg) == from {
   3501 		p.To.Reg = int16(to)
   3502 		p.Tt = 0
   3503 	}
   3504 
   3505 	if int(p.From.Index) == from {
   3506 		p.From.Index = int16(to)
   3507 		p.Ft = 0
   3508 	}
   3509 
   3510 	if int(p.To.Index) == from {
   3511 		p.To.Index = int16(to)
   3512 		p.Tt = 0
   3513 	}
   3514 
   3515 	if false { /* debug['Q'] */
   3516 		fmt.Printf("%v\n", p)
   3517 	}
   3518 }
   3519 
   3520 func (asmbuf *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
   3521 	switch op {
   3522 	case Pm, Pe, Pf2, Pf3:
   3523 		if osize != 1 {
   3524 			if op != Pm {
   3525 				asmbuf.Put1(byte(op))
   3526 			}
   3527 			asmbuf.Put1(Pm)
   3528 			z++
   3529 			op = int(o.op[z])
   3530 			break
   3531 		}
   3532 		fallthrough
   3533 
   3534 	default:
   3535 		if asmbuf.Len() == 0 || asmbuf.Last() != Pm {
   3536 			asmbuf.Put1(Pm)
   3537 		}
   3538 	}
   3539 
   3540 	asmbuf.Put1(byte(op))
   3541 	return z
   3542 }
   3543 
   3544 var bpduff1 = []byte{
   3545 	0x48, 0x89, 0x6c, 0x24, 0xf0, // MOVQ BP, -16(SP)
   3546 	0x48, 0x8d, 0x6c, 0x24, 0xf0, // LEAQ -16(SP), BP
   3547 }
   3548 
   3549 var bpduff2 = []byte{
   3550 	0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
   3551 }
   3552 
   3553 // Emit VEX prefix and opcode byte.
   3554 // The three addresses are the r/m, vvvv, and reg fields.
   3555 // The reg and rm arguments appear in the same order as the
   3556 // arguments to asmand, which typically follows the call to asmvex.
   3557 // The final two arguments are the VEX prefix (see encoding above)
   3558 // and the opcode byte.
   3559 // For details about vex prefix see:
   3560 // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
   3561 func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
   3562 	asmbuf.vexflag = 1
   3563 	rexR := 0
   3564 	if r != nil {
   3565 		rexR = regrex[r.Reg] & Rxr
   3566 	}
   3567 	rexB := 0
   3568 	rexX := 0
   3569 	if rm != nil {
   3570 		rexB = regrex[rm.Reg] & Rxb
   3571 		rexX = regrex[rm.Index] & Rxx
   3572 	}
   3573 	vexM := (vex >> 3) & 0xF
   3574 	vexWLP := vex & 0x87
   3575 	vexV := byte(0)
   3576 	if v != nil {
   3577 		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
   3578 	}
   3579 	vexV ^= 0xF
   3580 	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
   3581 		// Can use 2-byte encoding.
   3582 		asmbuf.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
   3583 	} else {
   3584 		// Must use 3-byte encoding.
   3585 		asmbuf.Put3(0xc4,
   3586 			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
   3587 			vexV<<3|vexWLP,
   3588 		)
   3589 	}
   3590 	asmbuf.Put1(opcode)
   3591 }
   3592 
   3593 // regIndex returns register index that fits in 4 bits.
   3594 //
   3595 // Examples:
   3596 //   REG_X15 => 15
   3597 //   REG_R9  => 9
   3598 //   REG_AX  => 0
   3599 //
   3600 func regIndex(r int16) int {
   3601 	lower3bits := reg[r]
   3602 	high4bit := regrex[r] & Rxr << 1
   3603 	return lower3bits | high4bit
   3604 }
   3605 
   3606 // avx2gatherValid returns true if p satisfies AVX2 gather constraints.
   3607 // Reports errors via ctxt.
   3608 func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
   3609 	// If any pair of the index, mask, or destination registers
   3610 	// are the same, this instruction results a #UD fault.
   3611 	index := regIndex(p.GetFrom3().Index)
   3612 	mask := regIndex(p.From.Reg)
   3613 	dest := regIndex(p.To.Reg)
   3614 	if dest == mask || dest == index || mask == index {
   3615 		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
   3616 		return false
   3617 	}
   3618 
   3619 	return true
   3620 }
   3621 
   3622 func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
   3623 	o := opindex[p.As&obj.AMask]
   3624 
   3625 	if o == nil {
   3626 		ctxt.Diag("asmins: missing op %v", p)
   3627 		return
   3628 	}
   3629 
   3630 	pre := prefixof(ctxt, p, &p.From)
   3631 	if pre != 0 {
   3632 		asmbuf.Put1(byte(pre))
   3633 	}
   3634 	pre = prefixof(ctxt, p, &p.To)
   3635 	if pre != 0 {
   3636 		asmbuf.Put1(byte(pre))
   3637 	}
   3638 
   3639 	// TODO(rsc): This special case is for SHRQ $3, AX:DX,
   3640 	// which encodes as SHRQ $32(DX*0), AX.
   3641 	// Similarly SHRQ CX, AX:DX is really SHRQ CX(DX*0), AX.
   3642 	// Change encoding generated by assemblers and compilers and remove.
   3643 	if (p.From.Type == obj.TYPE_CONST || p.From.Type == obj.TYPE_REG) && p.From.Index != REG_NONE && p.From.Scale == 0 {
   3644 		p.SetFrom3(obj.Addr{
   3645 			Type: obj.TYPE_REG,
   3646 			Reg:  p.From.Index,
   3647 		})
   3648 		p.From.Index = 0
   3649 	}
   3650 
   3651 	// TODO(rsc): This special case is for PINSRQ etc, CMPSD etc.
   3652 	// Change encoding generated by assemblers and compilers (if any) and remove.
   3653 	switch p.As {
   3654 	case AIMUL3Q, APEXTRW, APINSRW, APINSRD, APINSRQ, APSHUFHW, APSHUFL, APSHUFW, ASHUFPD, ASHUFPS, AAESKEYGENASSIST, APSHUFD, APCLMULQDQ:
   3655 		if p.From3Type() == obj.TYPE_NONE {
   3656 			p.SetFrom3(p.From)
   3657 			p.From = obj.Addr{}
   3658 			p.From.Type = obj.TYPE_CONST
   3659 			p.From.Offset = p.To.Offset
   3660 			p.To.Offset = 0
   3661 		}
   3662 	case ACMPSD, ACMPSS, ACMPPS, ACMPPD:
   3663 		if p.From3Type() == obj.TYPE_NONE {
   3664 			p.SetFrom3(p.To)
   3665 			p.To = obj.Addr{}
   3666 			p.To.Type = obj.TYPE_CONST
   3667 			p.To.Offset = p.GetFrom3().Offset
   3668 			p.GetFrom3().Offset = 0
   3669 		}
   3670 
   3671 	case AVGATHERDPD,
   3672 		AVGATHERQPD,
   3673 		AVGATHERDPS,
   3674 		AVGATHERQPS,
   3675 		AVPGATHERDD,
   3676 		AVPGATHERQD,
   3677 		AVPGATHERDQ,
   3678 		AVPGATHERQQ:
   3679 		if !avx2gatherValid(ctxt, p) {
   3680 			return
   3681 		}
   3682 	}
   3683 
   3684 	if p.Ft == 0 {
   3685 		p.Ft = uint8(oclass(ctxt, p, &p.From))
   3686 	}
   3687 	if p.Tt == 0 {
   3688 		p.Tt = uint8(oclass(ctxt, p, &p.To))
   3689 	}
   3690 
   3691 	ft := int(p.Ft) * Ymax
   3692 	var f3t int
   3693 	tt := int(p.Tt) * Ymax
   3694 
   3695 	xo := obj.Bool2int(o.op[0] == 0x0f)
   3696 	z := 0
   3697 	var a *obj.Addr
   3698 	var l int
   3699 	var op int
   3700 	var q *obj.Prog
   3701 	var r *obj.Reloc
   3702 	var rel obj.Reloc
   3703 	var v int64
   3704 
   3705 	args := make([]int, 0, 6)
   3706 	if ft != Ynone*Ymax {
   3707 		args = append(args, ft)
   3708 	}
   3709 	for i := range p.RestArgs {
   3710 		args = append(args, oclass(ctxt, p, &p.RestArgs[i])*Ymax)
   3711 	}
   3712 	if tt != Ynone*Ymax {
   3713 		args = append(args, tt)
   3714 	}
   3715 
   3716 	for _, yt := range o.ytab {
   3717 		if !yt.match(args) {
   3718 			z += int(yt.zoffset) + xo
   3719 		} else {
   3720 			switch o.prefix {
   3721 			case Px1: /* first option valid only in 32-bit mode */
   3722 				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
   3723 					z += int(yt.zoffset) + xo
   3724 					continue
   3725 				}
   3726 			case Pq: /* 16 bit escape and opcode escape */
   3727 				asmbuf.Put2(Pe, Pm)
   3728 
   3729 			case Pq3: /* 16 bit escape and opcode escape + REX.W */
   3730 				asmbuf.rexflag |= Pw
   3731 				asmbuf.Put2(Pe, Pm)
   3732 
   3733 			case Pq4: /*  66 0F 38 */
   3734 				asmbuf.Put3(0x66, 0x0F, 0x38)
   3735 
   3736 			case Pq4w: /*  66 0F 38 + REX.W */
   3737 				asmbuf.rexflag |= Pw
   3738 				asmbuf.Put3(0x66, 0x0F, 0x38)
   3739 
   3740 			case Pq5: /*  F3 0F 38 */
   3741 				asmbuf.Put3(0xF3, 0x0F, 0x38)
   3742 
   3743 			case Pq5w: /*  F3 0F 38 + REX.W */
   3744 				asmbuf.rexflag |= Pw
   3745 				asmbuf.Put3(0xF3, 0x0F, 0x38)
   3746 
   3747 			case Pf2, /* xmm opcode escape */
   3748 				Pf3:
   3749 				asmbuf.Put2(o.prefix, Pm)
   3750 
   3751 			case Pef3:
   3752 				asmbuf.Put3(Pe, Pf3, Pm)
   3753 
   3754 			case Pfw: /* xmm opcode escape + REX.W */
   3755 				asmbuf.rexflag |= Pw
   3756 				asmbuf.Put2(Pf3, Pm)
   3757 
   3758 			case Pm: /* opcode escape */
   3759 				asmbuf.Put1(Pm)
   3760 
   3761 			case Pe: /* 16 bit escape */
   3762 				asmbuf.Put1(Pe)
   3763 
   3764 			case Pw: /* 64-bit escape */
   3765 				if ctxt.Arch.Family != sys.AMD64 {
   3766 					ctxt.Diag("asmins: illegal 64: %v", p)
   3767 				}
   3768 				asmbuf.rexflag |= Pw
   3769 
   3770 			case Pw8: /* 64-bit escape if z >= 8 */
   3771 				if z >= 8 {
   3772 					if ctxt.Arch.Family != sys.AMD64 {
   3773 						ctxt.Diag("asmins: illegal 64: %v", p)
   3774 					}
   3775 					asmbuf.rexflag |= Pw
   3776 				}
   3777 
   3778 			case Pb: /* botch */
   3779 				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
   3780 					goto bad
   3781 				}
   3782 				// NOTE(rsc): This is probably safe to do always,
   3783 				// but when enabled it chooses different encodings
   3784 				// than the old cmd/internal/obj/i386 code did,
   3785 				// which breaks our "same bits out" checks.
   3786 				// In particular, CMPB AX, $0 encodes as 80 f8 00
   3787 				// in the original obj/i386, and it would encode
   3788 				// (using a valid, shorter form) as 3c 00 if we enabled
   3789 				// the call to bytereg here.
   3790 				if ctxt.Arch.Family == sys.AMD64 {
   3791 					bytereg(&p.From, &p.Ft)
   3792 					bytereg(&p.To, &p.Tt)
   3793 				}
   3794 
   3795 			case P32: /* 32 bit but illegal if 64-bit mode */
   3796 				if ctxt.Arch.Family == sys.AMD64 {
   3797 					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
   3798 				}
   3799 
   3800 			case Py: /* 64-bit only, no prefix */
   3801 				if ctxt.Arch.Family != sys.AMD64 {
   3802 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   3803 				}
   3804 
   3805 			case Py1: /* 64-bit only if z < 1, no prefix */
   3806 				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
   3807 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   3808 				}
   3809 
   3810 			case Py3: /* 64-bit only if z < 3, no prefix */
   3811 				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
   3812 					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
   3813 				}
   3814 			}
   3815 
   3816 			if z >= len(o.op) {
   3817 				log.Fatalf("asmins bad table %v", p)
   3818 			}
   3819 			op = int(o.op[z])
   3820 			// In vex case 0x0f is actually VEX_256_F2_0F_WIG
   3821 			if op == 0x0f && o.prefix != Pvex {
   3822 				asmbuf.Put1(byte(op))
   3823 				z++
   3824 				op = int(o.op[z])
   3825 			}
   3826 
   3827 			switch yt.zcase {
   3828 			default:
   3829 				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
   3830 				return
   3831 
   3832 			case Zpseudo:
   3833 				break
   3834 
   3835 			case Zlit:
   3836 				for ; ; z++ {
   3837 					op = int(o.op[z])
   3838 					if op == 0 {
   3839 						break
   3840 					}
   3841 					asmbuf.Put1(byte(op))
   3842 				}
   3843 
   3844 			case Zlitm_r:
   3845 				for ; ; z++ {
   3846 					op = int(o.op[z])
   3847 					if op == 0 {
   3848 						break
   3849 					}
   3850 					asmbuf.Put1(byte(op))
   3851 				}
   3852 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3853 
   3854 			case Zmb_r:
   3855 				bytereg(&p.From, &p.Ft)
   3856 				fallthrough
   3857 
   3858 			case Zm_r:
   3859 				asmbuf.Put1(byte(op))
   3860 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3861 
   3862 			case Zm2_r:
   3863 				asmbuf.Put2(byte(op), o.op[z+1])
   3864 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3865 
   3866 			case Zm_r_xm:
   3867 				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   3868 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3869 
   3870 			case Zm_r_xm_nr:
   3871 				asmbuf.rexflag = 0
   3872 				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   3873 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3874 
   3875 			case Zm_r_i_xm:
   3876 				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   3877 				asmbuf.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
   3878 				asmbuf.Put1(byte(p.To.Offset))
   3879 
   3880 			case Zibm_r, Zibr_m:
   3881 				for {
   3882 					tmp1 := z
   3883 					z++
   3884 					op = int(o.op[tmp1])
   3885 					if op == 0 {
   3886 						break
   3887 					}
   3888 					asmbuf.Put1(byte(op))
   3889 				}
   3890 				if yt.zcase == Zibr_m {
   3891 					asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
   3892 				} else {
   3893 					asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   3894 				}
   3895 				asmbuf.Put1(byte(p.From.Offset))
   3896 
   3897 			case Zaut_r:
   3898 				asmbuf.Put1(0x8d) // leal
   3899 				if p.From.Type != obj.TYPE_ADDR {
   3900 					ctxt.Diag("asmins: Zaut sb type ADDR")
   3901 				}
   3902 				p.From.Type = obj.TYPE_MEM
   3903 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3904 				p.From.Type = obj.TYPE_ADDR
   3905 
   3906 			case Zm_o:
   3907 				asmbuf.Put1(byte(op))
   3908 				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   3909 
   3910 			case Zr_m:
   3911 				asmbuf.Put1(byte(op))
   3912 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
   3913 
   3914 			case Zvex:
   3915 				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   3916 
   3917 			case Zvex_rm_v_r:
   3918 				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   3919 				asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   3920 
   3921 			case Zvex_rm_v_ro:
   3922 				asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
   3923 				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
   3924 
   3925 			case Zvex_i_r_v:
   3926 				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
   3927 				regnum := byte(0x7)
   3928 				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
   3929 					regnum &= byte(p.GetFrom3().Reg - REG_X0)
   3930 				} else {
   3931 					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
   3932 				}
   3933 				asmbuf.Put1(byte(o.op[z+2]) | regnum)
   3934 				asmbuf.Put1(byte(p.From.Offset))
   3935 
   3936 			case Zvex_i_rm_v_r:
   3937 				imm, from, from3, to := unpackOps4(p)
   3938 				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
   3939 				asmbuf.asmand(ctxt, cursym, p, from, to)
   3940 				asmbuf.Put1(byte(imm.Offset))
   3941 
   3942 			case Zvex_i_rm_r:
   3943 				asmbuf.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
   3944 				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   3945 				asmbuf.Put1(byte(p.From.Offset))
   3946 
   3947 			case Zvex_v_rm_r:
   3948 				asmbuf.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
   3949 				asmbuf.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
   3950 
   3951 			case Zvex_r_v_rm:
   3952 				asmbuf.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
   3953 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
   3954 
   3955 			case Zvex_rm_r_vo:
   3956 				asmbuf.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
   3957 				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
   3958 
   3959 			case Zvex_i_r_rm:
   3960 				asmbuf.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
   3961 				asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
   3962 				asmbuf.Put1(byte(p.From.Offset))
   3963 
   3964 			case Zvex_hr_rm_v_r:
   3965 				hr, from, from3, to := unpackOps4(p)
   3966 				asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
   3967 				asmbuf.asmand(ctxt, cursym, p, from, to)
   3968 				asmbuf.Put1(byte(regIndex(hr.Reg) << 4))
   3969 
   3970 			case Zr_m_xm:
   3971 				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   3972 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
   3973 
   3974 			case Zr_m_xm_nr:
   3975 				asmbuf.rexflag = 0
   3976 				asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   3977 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)
   3978 
   3979 			case Zo_m:
   3980 				asmbuf.Put1(byte(op))
   3981 				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   3982 
   3983 			case Zcallindreg:
   3984 				r = obj.Addrel(cursym)
   3985 				r.Off = int32(p.Pc)
   3986 				r.Type = objabi.R_CALLIND
   3987 				r.Siz = 0
   3988 				fallthrough
   3989 
   3990 			case Zo_m64:
   3991 				asmbuf.Put1(byte(op))
   3992 				asmbuf.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
   3993 
   3994 			case Zm_ibo:
   3995 				asmbuf.Put1(byte(op))
   3996 				asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   3997 				asmbuf.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
   3998 
   3999 			case Zibo_m:
   4000 				asmbuf.Put1(byte(op))
   4001 				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4002 				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4003 
   4004 			case Zibo_m_xm:
   4005 				z = asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z)
   4006 				asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4007 				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4008 
   4009 			case Z_ib, Zib_:
   4010 				if yt.zcase == Zib_ {
   4011 					a = &p.From
   4012 				} else {
   4013 					a = &p.To
   4014 				}
   4015 				asmbuf.Put1(byte(op))
   4016 				if p.As == AXABORT {
   4017 					asmbuf.Put1(o.op[z+1])
   4018 				}
   4019 				asmbuf.Put1(byte(vaddr(ctxt, p, a, nil)))
   4020 
   4021 			case Zib_rp:
   4022 				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   4023 				asmbuf.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
   4024 
   4025 			case Zil_rp:
   4026 				asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
   4027 				asmbuf.Put1(byte(op + reg[p.To.Reg]))
   4028 				if o.prefix == Pe {
   4029 					v = vaddr(ctxt, p, &p.From, nil)
   4030 					asmbuf.PutInt16(int16(v))
   4031 				} else {
   4032 					asmbuf.relput4(ctxt, cursym, p, &p.From)
   4033 				}
   4034 
   4035 			case Zo_iw:
   4036 				asmbuf.Put1(byte(op))
   4037 				if p.From.Type != obj.TYPE_NONE {
   4038 					v = vaddr(ctxt, p, &p.From, nil)
   4039 					asmbuf.PutInt16(int16(v))
   4040 				}
   4041 
   4042 			case Ziq_rp:
   4043 				v = vaddr(ctxt, p, &p.From, &rel)
   4044 				l = int(v >> 32)
   4045 				if l == 0 && rel.Siz != 8 {
   4046 					//p->mark |= 0100;
   4047 					//print("zero: %llux %v\n", v, p);
   4048 					asmbuf.rexflag &^= (0x40 | Rxw)
   4049 
   4050 					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
   4051 					asmbuf.Put1(byte(0xb8 + reg[p.To.Reg]))
   4052 					if rel.Type != 0 {
   4053 						r = obj.Addrel(cursym)
   4054 						*r = rel
   4055 						r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4056 					}
   4057 
   4058 					asmbuf.PutInt32(int32(v))
   4059 				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { /* sign extend */
   4060 
   4061 					//p->mark |= 0100;
   4062 					//print("sign: %llux %v\n", v, p);
   4063 					asmbuf.Put1(0xc7)
   4064 					asmbuf.asmando(ctxt, cursym, p, &p.To, 0)
   4065 
   4066 					asmbuf.PutInt32(int32(v)) // need all 8
   4067 				} else {
   4068 					//print("all: %llux %v\n", v, p);
   4069 					asmbuf.rexflag |= regrex[p.To.Reg] & Rxb
   4070 					asmbuf.Put1(byte(op + reg[p.To.Reg]))
   4071 					if rel.Type != 0 {
   4072 						r = obj.Addrel(cursym)
   4073 						*r = rel
   4074 						r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4075 					}
   4076 
   4077 					asmbuf.PutInt64(v)
   4078 				}
   4079 
   4080 			case Zib_rr:
   4081 				asmbuf.Put1(byte(op))
   4082 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
   4083 				asmbuf.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
   4084 
   4085 			case Z_il, Zil_:
   4086 				if yt.zcase == Zil_ {
   4087 					a = &p.From
   4088 				} else {
   4089 					a = &p.To
   4090 				}
   4091 				asmbuf.Put1(byte(op))
   4092 				if o.prefix == Pe {
   4093 					v = vaddr(ctxt, p, a, nil)
   4094 					asmbuf.PutInt16(int16(v))
   4095 				} else {
   4096 					asmbuf.relput4(ctxt, cursym, p, a)
   4097 				}
   4098 
   4099 			case Zm_ilo, Zilo_m:
   4100 				asmbuf.Put1(byte(op))
   4101 				if yt.zcase == Zilo_m {
   4102 					a = &p.From
   4103 					asmbuf.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
   4104 				} else {
   4105 					a = &p.To
   4106 					asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
   4107 				}
   4108 
   4109 				if o.prefix == Pe {
   4110 					v = vaddr(ctxt, p, a, nil)
   4111 					asmbuf.PutInt16(int16(v))
   4112 				} else {
   4113 					asmbuf.relput4(ctxt, cursym, p, a)
   4114 				}
   4115 
   4116 			case Zil_rr:
   4117 				asmbuf.Put1(byte(op))
   4118 				asmbuf.asmand(ctxt, cursym, p, &p.To, &p.To)
   4119 				if o.prefix == Pe {
   4120 					v = vaddr(ctxt, p, &p.From, nil)
   4121 					asmbuf.PutInt16(int16(v))
   4122 				} else {
   4123 					asmbuf.relput4(ctxt, cursym, p, &p.From)
   4124 				}
   4125 
   4126 			case Z_rp:
   4127 				asmbuf.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
   4128 				asmbuf.Put1(byte(op + reg[p.To.Reg]))
   4129 
   4130 			case Zrp_:
   4131 				asmbuf.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
   4132 				asmbuf.Put1(byte(op + reg[p.From.Reg]))
   4133 
   4134 			case Zcallcon, Zjmpcon:
   4135 				if yt.zcase == Zcallcon {
   4136 					asmbuf.Put1(byte(op))
   4137 				} else {
   4138 					asmbuf.Put1(o.op[z+1])
   4139 				}
   4140 				r = obj.Addrel(cursym)
   4141 				r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4142 				r.Type = objabi.R_PCREL
   4143 				r.Siz = 4
   4144 				r.Add = p.To.Offset
   4145 				asmbuf.PutInt32(0)
   4146 
   4147 			case Zcallind:
   4148 				asmbuf.Put2(byte(op), o.op[z+1])
   4149 				r = obj.Addrel(cursym)
   4150 				r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4151 				if ctxt.Arch.Family == sys.AMD64 {
   4152 					r.Type = objabi.R_PCREL
   4153 				} else {
   4154 					r.Type = objabi.R_ADDR
   4155 				}
   4156 				r.Siz = 4
   4157 				r.Add = p.To.Offset
   4158 				r.Sym = p.To.Sym
   4159 				asmbuf.PutInt32(0)
   4160 
   4161 			case Zcall, Zcallduff:
   4162 				if p.To.Sym == nil {
   4163 					ctxt.Diag("call without target")
   4164 					ctxt.DiagFlush()
   4165 					log.Fatalf("bad code")
   4166 				}
   4167 
   4168 				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
   4169 					ctxt.Diag("directly calling duff when dynamically linking Go")
   4170 				}
   4171 
   4172 				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
   4173 					// Maintain BP around call, since duffcopy/duffzero can't do it
   4174 					// (the call jumps into the middle of the function).
   4175 					// This makes it possible to see call sites for duffcopy/duffzero in
   4176 					// BP-based profiling tools like Linux perf (which is the
   4177 					// whole point of obj.Framepointer_enabled).
   4178 					// MOVQ BP, -16(SP)
   4179 					// LEAQ -16(SP), BP
   4180 					asmbuf.Put(bpduff1)
   4181 				}
   4182 				asmbuf.Put1(byte(op))
   4183 				r = obj.Addrel(cursym)
   4184 				r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4185 				r.Sym = p.To.Sym
   4186 				r.Add = p.To.Offset
   4187 				r.Type = objabi.R_CALL
   4188 				r.Siz = 4
   4189 				asmbuf.PutInt32(0)
   4190 
   4191 				if ctxt.Framepointer_enabled && yt.zcase == Zcallduff && ctxt.Arch.Family == sys.AMD64 {
   4192 					// Pop BP pushed above.
   4193 					// MOVQ 0(BP), BP
   4194 					asmbuf.Put(bpduff2)
   4195 				}
   4196 
   4197 			// TODO: jump across functions needs reloc
   4198 			case Zbr, Zjmp, Zloop:
   4199 				if p.As == AXBEGIN {
   4200 					asmbuf.Put1(byte(op))
   4201 				}
   4202 				if p.To.Sym != nil {
   4203 					if yt.zcase != Zjmp {
   4204 						ctxt.Diag("branch to ATEXT")
   4205 						ctxt.DiagFlush()
   4206 						log.Fatalf("bad code")
   4207 					}
   4208 
   4209 					asmbuf.Put1(o.op[z+1])
   4210 					r = obj.Addrel(cursym)
   4211 					r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4212 					r.Sym = p.To.Sym
   4213 					r.Type = objabi.R_PCREL
   4214 					r.Siz = 4
   4215 					asmbuf.PutInt32(0)
   4216 					break
   4217 				}
   4218 
   4219 				// Assumes q is in this function.
   4220 				// TODO: Check in input, preserve in brchain.
   4221 
   4222 				// Fill in backward jump now.
   4223 				q = p.Pcond
   4224 
   4225 				if q == nil {
   4226 					ctxt.Diag("jmp/branch/loop without target")
   4227 					ctxt.DiagFlush()
   4228 					log.Fatalf("bad code")
   4229 				}
   4230 
   4231 				if p.Back&1 != 0 {
   4232 					v = q.Pc - (p.Pc + 2)
   4233 					if v >= -128 && p.As != AXBEGIN {
   4234 						if p.As == AJCXZL {
   4235 							asmbuf.Put1(0x67)
   4236 						}
   4237 						asmbuf.Put2(byte(op), byte(v))
   4238 					} else if yt.zcase == Zloop {
   4239 						ctxt.Diag("loop too far: %v", p)
   4240 					} else {
   4241 						v -= 5 - 2
   4242 						if p.As == AXBEGIN {
   4243 							v--
   4244 						}
   4245 						if yt.zcase == Zbr {
   4246 							asmbuf.Put1(0x0f)
   4247 							v--
   4248 						}
   4249 
   4250 						asmbuf.Put1(o.op[z+1])
   4251 						asmbuf.PutInt32(int32(v))
   4252 					}
   4253 
   4254 					break
   4255 				}
   4256 
   4257 				// Annotate target; will fill in later.
   4258 				p.Forwd = q.Rel
   4259 
   4260 				q.Rel = p
   4261 				if p.Back&2 != 0 && p.As != AXBEGIN { // short
   4262 					if p.As == AJCXZL {
   4263 						asmbuf.Put1(0x67)
   4264 					}
   4265 					asmbuf.Put2(byte(op), 0)
   4266 				} else if yt.zcase == Zloop {
   4267 					ctxt.Diag("loop too far: %v", p)
   4268 				} else {
   4269 					if yt.zcase == Zbr {
   4270 						asmbuf.Put1(0x0f)
   4271 					}
   4272 					asmbuf.Put1(o.op[z+1])
   4273 					asmbuf.PutInt32(0)
   4274 				}
   4275 
   4276 				break
   4277 
   4278 			/*
   4279 				v = q->pc - p->pc - 2;
   4280 				if((v >= -128 && v <= 127) || p->pc == -1 || q->pc == -1) {
   4281 					*ctxt->andptr++ = op;
   4282 					*ctxt->andptr++ = v;
   4283 				} else {
   4284 					v -= 5-2;
   4285 					if(yt.zcase == Zbr) {
   4286 						*ctxt->andptr++ = 0x0f;
   4287 						v--;
   4288 					}
   4289 					*ctxt->andptr++ = o->op[z+1];
   4290 					*ctxt->andptr++ = v;
   4291 					*ctxt->andptr++ = v>>8;
   4292 					*ctxt->andptr++ = v>>16;
   4293 					*ctxt->andptr++ = v>>24;
   4294 				}
   4295 			*/
   4296 
   4297 			case Zbyte:
   4298 				v = vaddr(ctxt, p, &p.From, &rel)
   4299 				if rel.Siz != 0 {
   4300 					rel.Siz = uint8(op)
   4301 					r = obj.Addrel(cursym)
   4302 					*r = rel
   4303 					r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4304 				}
   4305 
   4306 				asmbuf.Put1(byte(v))
   4307 				if op > 1 {
   4308 					asmbuf.Put1(byte(v >> 8))
   4309 					if op > 2 {
   4310 						asmbuf.PutInt16(int16(v >> 16))
   4311 						if op > 4 {
   4312 							asmbuf.PutInt32(int32(v >> 32))
   4313 						}
   4314 					}
   4315 				}
   4316 			}
   4317 
   4318 			return
   4319 		}
   4320 	}
   4321 	f3t = Ynone * Ymax
   4322 	if p.GetFrom3() != nil {
   4323 		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
   4324 	}
   4325 	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
   4326 		var pp obj.Prog
   4327 		var t []byte
   4328 		if p.As == mo[0].as {
   4329 			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
   4330 				t = mo[0].op[:]
   4331 				switch mo[0].code {
   4332 				default:
   4333 					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
   4334 
   4335 				case 0: /* lit */
   4336 					for z = 0; t[z] != E; z++ {
   4337 						asmbuf.Put1(t[z])
   4338 					}
   4339 
   4340 				case 1: /* r,m */
   4341 					asmbuf.Put1(t[0])
   4342 					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[1]))
   4343 
   4344 				case 2: /* m,r */
   4345 					asmbuf.Put1(t[0])
   4346 					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[1]))
   4347 
   4348 				case 3: /* r,m - 2op */
   4349 					asmbuf.Put2(t[0], t[1])
   4350 					asmbuf.asmando(ctxt, cursym, p, &p.To, int(t[2]))
   4351 					asmbuf.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
   4352 
   4353 				case 4: /* m,r - 2op */
   4354 					asmbuf.Put2(t[0], t[1])
   4355 					asmbuf.asmando(ctxt, cursym, p, &p.From, int(t[2]))
   4356 					asmbuf.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
   4357 
   4358 				case 5: /* load full pointer, trash heap */
   4359 					if t[0] != 0 {
   4360 						asmbuf.Put1(t[0])
   4361 					}
   4362 					switch p.To.Index {
   4363 					default:
   4364 						goto bad
   4365 
   4366 					case REG_DS:
   4367 						asmbuf.Put1(0xc5)
   4368 
   4369 					case REG_SS:
   4370 						asmbuf.Put2(0x0f, 0xb2)
   4371 
   4372 					case REG_ES:
   4373 						asmbuf.Put1(0xc4)
   4374 
   4375 					case REG_FS:
   4376 						asmbuf.Put2(0x0f, 0xb4)
   4377 
   4378 					case REG_GS:
   4379 						asmbuf.Put2(0x0f, 0xb5)
   4380 					}
   4381 
   4382 					asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To)
   4383 
   4384 				case 6: /* double shift */
   4385 					if t[0] == Pw {
   4386 						if ctxt.Arch.Family != sys.AMD64 {
   4387 							ctxt.Diag("asmins: illegal 64: %v", p)
   4388 						}
   4389 						asmbuf.rexflag |= Pw
   4390 						t = t[1:]
   4391 					} else if t[0] == Pe {
   4392 						asmbuf.Put1(Pe)
   4393 						t = t[1:]
   4394 					}
   4395 
   4396 					switch p.From.Type {
   4397 					default:
   4398 						goto bad
   4399 
   4400 					case obj.TYPE_CONST:
   4401 						asmbuf.Put2(0x0f, t[0])
   4402 						asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
   4403 						asmbuf.Put1(byte(p.From.Offset))
   4404 
   4405 					case obj.TYPE_REG:
   4406 						switch p.From.Reg {
   4407 						default:
   4408 							goto bad
   4409 
   4410 						case REG_CL, REG_CX:
   4411 							asmbuf.Put2(0x0f, t[1])
   4412 							asmbuf.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
   4413 						}
   4414 					}
   4415 
   4416 				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   4417 				// where you load the TLS base register into a register and then index off that
   4418 				// register to access the actual TLS variables. Systems that allow direct TLS access
   4419 				// are handled in prefixof above and should not be listed here.
   4420 				case 7: /* mov tls, r */
   4421 					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
   4422 						ctxt.Diag("invalid load of TLS: %v", p)
   4423 					}
   4424 
   4425 					if ctxt.Arch.Family == sys.I386 {
   4426 						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
   4427 						// where you load the TLS base register into a register and then index off that
   4428 						// register to access the actual TLS variables. Systems that allow direct TLS access
   4429 						// are handled in prefixof above and should not be listed here.
   4430 						switch ctxt.Headtype {
   4431 						default:
   4432 							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   4433 
   4434 						case objabi.Hlinux,
   4435 							objabi.Hnacl:
   4436 							if ctxt.Flag_shared {
   4437 								// Note that this is not generating the same insns as the other cases.
   4438 								//     MOV TLS, dst
   4439 								// becomes
   4440 								//     call __x86.get_pc_thunk.dst
   4441 								//     movl (gotpc + g@gotntpoff)(dst), dst
   4442 								// which is encoded as
   4443 								//     call __x86.get_pc_thunk.dst
   4444 								//     movq 0(dst), dst
   4445 								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
   4446 								// is g, which we can't check here, but will when we assemble the second
   4447 								// instruction.
   4448 								dst := p.To.Reg
   4449 								asmbuf.Put1(0xe8)
   4450 								r = obj.Addrel(cursym)
   4451 								r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4452 								r.Type = objabi.R_CALL
   4453 								r.Siz = 4
   4454 								r.Sym = ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst))))
   4455 								asmbuf.PutInt32(0)
   4456 
   4457 								asmbuf.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
   4458 								r = obj.Addrel(cursym)
   4459 								r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4460 								r.Type = objabi.R_TLS_IE
   4461 								r.Siz = 4
   4462 								r.Add = 2
   4463 								asmbuf.PutInt32(0)
   4464 							} else {
   4465 								// ELF TLS base is 0(GS).
   4466 								pp.From = p.From
   4467 
   4468 								pp.From.Type = obj.TYPE_MEM
   4469 								pp.From.Reg = REG_GS
   4470 								pp.From.Offset = 0
   4471 								pp.From.Index = REG_NONE
   4472 								pp.From.Scale = 0
   4473 								asmbuf.Put2(0x65, // GS
   4474 									0x8B)
   4475 								asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4476 							}
   4477 						case objabi.Hplan9:
   4478 							pp.From = obj.Addr{}
   4479 							pp.From.Type = obj.TYPE_MEM
   4480 							pp.From.Name = obj.NAME_EXTERN
   4481 							pp.From.Sym = plan9privates
   4482 							pp.From.Offset = 0
   4483 							pp.From.Index = REG_NONE
   4484 							asmbuf.Put1(0x8B)
   4485 							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4486 
   4487 						case objabi.Hwindows:
   4488 							// Windows TLS base is always 0x14(FS).
   4489 							pp.From = p.From
   4490 
   4491 							pp.From.Type = obj.TYPE_MEM
   4492 							pp.From.Reg = REG_FS
   4493 							pp.From.Offset = 0x14
   4494 							pp.From.Index = REG_NONE
   4495 							pp.From.Scale = 0
   4496 							asmbuf.Put2(0x64, // FS
   4497 								0x8B)
   4498 							asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4499 						}
   4500 						break
   4501 					}
   4502 
   4503 					switch ctxt.Headtype {
   4504 					default:
   4505 						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
   4506 
   4507 					case objabi.Hlinux:
   4508 						if !ctxt.Flag_shared {
   4509 							log.Fatalf("unknown TLS base location for linux without -shared")
   4510 						}
   4511 						// Note that this is not generating the same insn as the other cases.
   4512 						//     MOV TLS, R_to
   4513 						// becomes
   4514 						//     movq g@gottpoff(%rip), R_to
   4515 						// which is encoded as
   4516 						//     movq 0(%rip), R_to
   4517 						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
   4518 						// is g, which we can't check here, but will when we assemble the second
   4519 						// instruction.
   4520 						asmbuf.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
   4521 
   4522 						asmbuf.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
   4523 						r = obj.Addrel(cursym)
   4524 						r.Off = int32(p.Pc + int64(asmbuf.Len()))
   4525 						r.Type = objabi.R_TLS_IE
   4526 						r.Siz = 4
   4527 						r.Add = -4
   4528 						asmbuf.PutInt32(0)
   4529 
   4530 					case objabi.Hplan9:
   4531 						pp.From = obj.Addr{}
   4532 						pp.From.Type = obj.TYPE_MEM
   4533 						pp.From.Name = obj.NAME_EXTERN
   4534 						pp.From.Sym = plan9privates
   4535 						pp.From.Offset = 0
   4536 						pp.From.Index = REG_NONE
   4537 						asmbuf.rexflag |= Pw
   4538 						asmbuf.Put1(0x8B)
   4539 						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4540 
   4541 					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
   4542 						// TLS base is 0(FS).
   4543 						pp.From = p.From
   4544 
   4545 						pp.From.Type = obj.TYPE_MEM
   4546 						pp.From.Name = obj.NAME_NONE
   4547 						pp.From.Reg = REG_NONE
   4548 						pp.From.Offset = 0
   4549 						pp.From.Index = REG_NONE
   4550 						pp.From.Scale = 0
   4551 						asmbuf.rexflag |= Pw
   4552 						asmbuf.Put2(0x64, // FS
   4553 							0x8B)
   4554 						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4555 
   4556 					case objabi.Hwindows:
   4557 						// Windows TLS base is always 0x28(GS).
   4558 						pp.From = p.From
   4559 
   4560 						pp.From.Type = obj.TYPE_MEM
   4561 						pp.From.Name = obj.NAME_NONE
   4562 						pp.From.Reg = REG_GS
   4563 						pp.From.Offset = 0x28
   4564 						pp.From.Index = REG_NONE
   4565 						pp.From.Scale = 0
   4566 						asmbuf.rexflag |= Pw
   4567 						asmbuf.Put2(0x65, // GS
   4568 							0x8B)
   4569 						asmbuf.asmand(ctxt, cursym, p, &pp.From, &p.To)
   4570 					}
   4571 				}
   4572 				return
   4573 			}
   4574 		}
   4575 	}
   4576 	goto bad
   4577 
   4578 bad:
   4579 	if ctxt.Arch.Family != sys.AMD64 {
   4580 		/*
   4581 		 * here, the assembly has failed.
   4582 		 * if its a byte instruction that has
   4583 		 * unaddressable registers, try to
   4584 		 * exchange registers and reissue the
   4585 		 * instruction with the operands renamed.
   4586 		 */
   4587 		pp := *p
   4588 
   4589 		unbytereg(&pp.From, &pp.Ft)
   4590 		unbytereg(&pp.To, &pp.Tt)
   4591 
   4592 		z := int(p.From.Reg)
   4593 		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   4594 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   4595 			// For now, different to keep bit-for-bit compatibility.
   4596 			if ctxt.Arch.Family == sys.I386 {
   4597 				breg := byteswapreg(ctxt, &p.To)
   4598 				if breg != REG_AX {
   4599 					asmbuf.Put1(0x87) // xchg lhs,bx
   4600 					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
   4601 					subreg(&pp, z, breg)
   4602 					asmbuf.doasm(ctxt, cursym, &pp)
   4603 					asmbuf.Put1(0x87) // xchg lhs,bx
   4604 					asmbuf.asmando(ctxt, cursym, p, &p.From, reg[breg])
   4605 				} else {
   4606 					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4607 					subreg(&pp, z, REG_AX)
   4608 					asmbuf.doasm(ctxt, cursym, &pp)
   4609 					asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4610 				}
   4611 				return
   4612 			}
   4613 
   4614 			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
   4615 				// We certainly don't want to exchange
   4616 				// with AX if the op is MUL or DIV.
   4617 				asmbuf.Put1(0x87) // xchg lhs,bx
   4618 				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
   4619 				subreg(&pp, z, REG_BX)
   4620 				asmbuf.doasm(ctxt, cursym, &pp)
   4621 				asmbuf.Put1(0x87) // xchg lhs,bx
   4622 				asmbuf.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
   4623 			} else {
   4624 				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4625 				subreg(&pp, z, REG_AX)
   4626 				asmbuf.doasm(ctxt, cursym, &pp)
   4627 				asmbuf.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
   4628 			}
   4629 			return
   4630 		}
   4631 
   4632 		z = int(p.To.Reg)
   4633 		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
   4634 			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
   4635 			// For now, different to keep bit-for-bit compatibility.
   4636 			if ctxt.Arch.Family == sys.I386 {
   4637 				breg := byteswapreg(ctxt, &p.From)
   4638 				if breg != REG_AX {
   4639 					asmbuf.Put1(0x87) //xchg rhs,bx
   4640 					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
   4641 					subreg(&pp, z, breg)
   4642 					asmbuf.doasm(ctxt, cursym, &pp)
   4643 					asmbuf.Put1(0x87) // xchg rhs,bx
   4644 					asmbuf.asmando(ctxt, cursym, p, &p.To, reg[breg])
   4645 				} else {
   4646 					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4647 					subreg(&pp, z, REG_AX)
   4648 					asmbuf.doasm(ctxt, cursym, &pp)
   4649 					asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4650 				}
   4651 				return
   4652 			}
   4653 
   4654 			if isax(&p.From) {
   4655 				asmbuf.Put1(0x87) // xchg rhs,bx
   4656 				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
   4657 				subreg(&pp, z, REG_BX)
   4658 				asmbuf.doasm(ctxt, cursym, &pp)
   4659 				asmbuf.Put1(0x87) // xchg rhs,bx
   4660 				asmbuf.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
   4661 			} else {
   4662 				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4663 				subreg(&pp, z, REG_AX)
   4664 				asmbuf.doasm(ctxt, cursym, &pp)
   4665 				asmbuf.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
   4666 			}
   4667 			return
   4668 		}
   4669 	}
   4670 
   4671 	ctxt.Diag("invalid instruction: %v", p)
   4672 	//	ctxt.Diag("doasm: notfound ft=%d tt=%d %v %d %d", p.Ft, p.Tt, p, oclass(ctxt, p, &p.From), oclass(ctxt, p, &p.To))
   4673 	return
   4674 }
   4675 
   4676 // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
   4677 // which is not referenced in a.
   4678 // If a is empty, it returns BX to account for MULB-like instructions
   4679 // that might use DX and AX.
   4680 func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
   4681 	cana, canb, canc, cand := true, true, true, true
   4682 	if a.Type == obj.TYPE_NONE {
   4683 		cana, cand = false, false
   4684 	}
   4685 
   4686 	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
   4687 		switch a.Reg {
   4688 		case REG_NONE:
   4689 			cana, cand = false, false
   4690 		case REG_AX, REG_AL, REG_AH:
   4691 			cana = false
   4692 		case REG_BX, REG_BL, REG_BH:
   4693 			canb = false
   4694 		case REG_CX, REG_CL, REG_CH:
   4695 			canc = false
   4696 		case REG_DX, REG_DL, REG_DH:
   4697 			cand = false
   4698 		}
   4699 	}
   4700 
   4701 	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
   4702 		switch a.Index {
   4703 		case REG_AX:
   4704 			cana = false
   4705 		case REG_BX:
   4706 			canb = false
   4707 		case REG_CX:
   4708 			canc = false
   4709 		case REG_DX:
   4710 			cand = false
   4711 		}
   4712 	}
   4713 
   4714 	switch {
   4715 	case cana:
   4716 		return REG_AX
   4717 	case canb:
   4718 		return REG_BX
   4719 	case canc:
   4720 		return REG_CX
   4721 	case cand:
   4722 		return REG_DX
   4723 	default:
   4724 		ctxt.Diag("impossible byte register")
   4725 		ctxt.DiagFlush()
   4726 		log.Fatalf("bad code")
   4727 		return 0
   4728 	}
   4729 }
   4730 
   4731 func isbadbyte(a *obj.Addr) bool {
   4732 	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
   4733 }
   4734 
   4735 var naclret = []uint8{
   4736 	0x5e, // POPL SI
   4737 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4738 	0x83,
   4739 	0xe6,
   4740 	0xe0, // ANDL $~31, SI
   4741 	0x4c,
   4742 	0x01,
   4743 	0xfe, // ADDQ R15, SI
   4744 	0xff,
   4745 	0xe6, // JMP SI
   4746 }
   4747 
   4748 var naclret8 = []uint8{
   4749 	0x5d, // POPL BP
   4750 	// 0x8b, 0x7d, 0x00, // MOVL (BP), DI - catch return to invalid address, for debugging
   4751 	0x83,
   4752 	0xe5,
   4753 	0xe0, // ANDL $~31, BP
   4754 	0xff,
   4755 	0xe5, // JMP BP
   4756 }
   4757 
   4758 var naclspfix = []uint8{0x4c, 0x01, 0xfc} // ADDQ R15, SP
   4759 
   4760 var naclbpfix = []uint8{0x4c, 0x01, 0xfd} // ADDQ R15, BP
   4761 
   4762 var naclmovs = []uint8{
   4763 	0x89,
   4764 	0xf6, // MOVL SI, SI
   4765 	0x49,
   4766 	0x8d,
   4767 	0x34,
   4768 	0x37, // LEAQ (R15)(SI*1), SI
   4769 	0x89,
   4770 	0xff, // MOVL DI, DI
   4771 	0x49,
   4772 	0x8d,
   4773 	0x3c,
   4774 	0x3f, // LEAQ (R15)(DI*1), DI
   4775 }
   4776 
   4777 var naclstos = []uint8{
   4778 	0x89,
   4779 	0xff, // MOVL DI, DI
   4780 	0x49,
   4781 	0x8d,
   4782 	0x3c,
   4783 	0x3f, // LEAQ (R15)(DI*1), DI
   4784 }
   4785 
   4786 func (asmbuf *AsmBuf) nacltrunc(ctxt *obj.Link, reg int) {
   4787 	if reg >= REG_R8 {
   4788 		asmbuf.Put1(0x45)
   4789 	}
   4790 	reg = (reg - REG_AX) & 7
   4791 	asmbuf.Put2(0x89, byte(3<<6|reg<<3|reg))
   4792 }
   4793 
   4794 func (asmbuf *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
   4795 	asmbuf.Reset()
   4796 
   4797 	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.I386 {
   4798 		switch p.As {
   4799 		case obj.ARET:
   4800 			asmbuf.Put(naclret8)
   4801 			return
   4802 
   4803 		case obj.ACALL,
   4804 			obj.AJMP:
   4805 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4806 				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
   4807 			}
   4808 
   4809 		case AINT:
   4810 			asmbuf.Put1(0xf4)
   4811 			return
   4812 		}
   4813 	}
   4814 
   4815 	if ctxt.Headtype == objabi.Hnacl && ctxt.Arch.Family == sys.AMD64 {
   4816 		if p.As == AREP {
   4817 			asmbuf.rep++
   4818 			return
   4819 		}
   4820 
   4821 		if p.As == AREPN {
   4822 			asmbuf.repn++
   4823 			return
   4824 		}
   4825 
   4826 		if p.As == ALOCK {
   4827 			asmbuf.lock = true
   4828 			return
   4829 		}
   4830 
   4831 		if p.As != ALEAQ && p.As != ALEAL {
   4832 			if p.From.Index != REG_NONE && p.From.Scale > 0 {
   4833 				asmbuf.nacltrunc(ctxt, int(p.From.Index))
   4834 			}
   4835 			if p.To.Index != REG_NONE && p.To.Scale > 0 {
   4836 				asmbuf.nacltrunc(ctxt, int(p.To.Index))
   4837 			}
   4838 		}
   4839 
   4840 		switch p.As {
   4841 		case obj.ARET:
   4842 			asmbuf.Put(naclret)
   4843 			return
   4844 
   4845 		case obj.ACALL,
   4846 			obj.AJMP:
   4847 			if p.To.Type == obj.TYPE_REG && REG_AX <= p.To.Reg && p.To.Reg <= REG_DI {
   4848 				// ANDL $~31, reg
   4849 				asmbuf.Put3(0x83, byte(0xe0|(p.To.Reg-REG_AX)), 0xe0)
   4850 				// ADDQ R15, reg
   4851 				asmbuf.Put3(0x4c, 0x01, byte(0xf8|(p.To.Reg-REG_AX)))
   4852 			}
   4853 
   4854 			if p.To.Type == obj.TYPE_REG && REG_R8 <= p.To.Reg && p.To.Reg <= REG_R15 {
   4855 				// ANDL $~31, reg
   4856 				asmbuf.Put4(0x41, 0x83, byte(0xe0|(p.To.Reg-REG_R8)), 0xe0)
   4857 				// ADDQ R15, reg
   4858 				asmbuf.Put3(0x4d, 0x01, byte(0xf8|(p.To.Reg-REG_R8)))
   4859 			}
   4860 
   4861 		case AINT:
   4862 			asmbuf.Put1(0xf4)
   4863 			return
   4864 
   4865 		case ASCASB,
   4866 			ASCASW,
   4867 			ASCASL,
   4868 			ASCASQ,
   4869 			ASTOSB,
   4870 			ASTOSW,
   4871 			ASTOSL,
   4872 			ASTOSQ:
   4873 			asmbuf.Put(naclstos)
   4874 
   4875 		case AMOVSB, AMOVSW, AMOVSL, AMOVSQ:
   4876 			asmbuf.Put(naclmovs)
   4877 		}
   4878 
   4879 		if asmbuf.rep != 0 {
   4880 			asmbuf.Put1(0xf3)
   4881 			asmbuf.rep = 0
   4882 		}
   4883 
   4884 		if asmbuf.repn != 0 {
   4885 			asmbuf.Put1(0xf2)
   4886 			asmbuf.repn = 0
   4887 		}
   4888 
   4889 		if asmbuf.lock {
   4890 			asmbuf.Put1(0xf0)
   4891 			asmbuf.lock = false
   4892 		}
   4893 	}
   4894 
   4895 	asmbuf.rexflag = 0
   4896 	asmbuf.vexflag = 0
   4897 	mark := asmbuf.Len()
   4898 	asmbuf.doasm(ctxt, cursym, p)
   4899 	if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
   4900 		/*
   4901 		 * as befits the whole approach of the architecture,
   4902 		 * the rex prefix must appear before the first opcode byte
   4903 		 * (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
   4904 		 * before the 0f opcode escape!), or it might be ignored.
   4905 		 * note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
   4906 		 */
   4907 		if ctxt.Arch.Family != sys.AMD64 {
   4908 			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
   4909 		}
   4910 		n := asmbuf.Len()
   4911 		var np int
   4912 		for np = mark; np < n; np++ {
   4913 			c := asmbuf.At(np)
   4914 			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
   4915 				break
   4916 			}
   4917 		}
   4918 		asmbuf.Insert(np, byte(0x40|asmbuf.rexflag))
   4919 	}
   4920 
   4921 	n := asmbuf.Len()
   4922 	for i := len(cursym.R) - 1; i >= 0; i-- {
   4923 		r := &cursym.R[i]
   4924 		if int64(r.Off) < p.Pc {
   4925 			break
   4926 		}
   4927 		if asmbuf.rexflag != 0 && asmbuf.vexflag == 0 {
   4928 			r.Off++
   4929 		}
   4930 		if r.Type == objabi.R_PCREL {
   4931 			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
   4932 				// PC-relative addressing is relative to the end of the instruction,
   4933 				// but the relocations applied by the linker are relative to the end
   4934 				// of the relocation. Because immediate instruction
   4935 				// arguments can follow the PC-relative memory reference in the
   4936 				// instruction encoding, the two may not coincide. In this case,
   4937 				// adjust addend so that linker can keep relocating relative to the
   4938 				// end of the relocation.
   4939 				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
   4940 			} else if ctxt.Arch.Family == sys.I386 {
   4941 				// On 386 PC-relative addressing (for non-call/jmp instructions)
   4942 				// assumes that the previous instruction loaded the PC of the end
   4943 				// of that instruction into CX, so the adjustment is relative to
   4944 				// that.
   4945 				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   4946 			}
   4947 		}
   4948 		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
   4949 			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
   4950 			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
   4951 		}
   4952 
   4953 	}
   4954 
   4955 	if ctxt.Arch.Family == sys.AMD64 && ctxt.Headtype == objabi.Hnacl && p.As != ACMPL && p.As != ACMPQ && p.To.Type == obj.TYPE_REG {
   4956 		switch p.To.Reg {
   4957 		case REG_SP:
   4958 			asmbuf.Put(naclspfix)
   4959 		case REG_BP:
   4960 			asmbuf.Put(naclbpfix)
   4961 		}
   4962 	}
   4963 }
   4964 
   4965 // Extract 4 operands from p.
   4966 func unpackOps4(p *obj.Prog) (*obj.Addr, *obj.Addr, *obj.Addr, *obj.Addr) {
   4967 	return &p.From, &p.RestArgs[0], &p.RestArgs[1], &p.To
   4968 }
   4969