Home | History | Annotate | Download | only in fipstools
      1 // Copyright (c) 2017, Google Inc.
      2 //
      3 // Permission to use, copy, modify, and/or distribute this software for any
      4 // purpose with or without fee is hereby granted, provided that the above
      5 // copyright notice and this permission notice appear in all copies.
      6 //
      7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
      8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
      9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     10 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     12 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     13 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
     14 
     15 //go:generate peg delocate.peg
     16 
     17 // delocate performs several transformations of textual assembly code. See
     18 // crypto/fipsmodule/FIPS.md for an overview.
     19 package main
     20 
     21 import (
     22 	"errors"
     23 	"flag"
     24 	"fmt"
     25 	"io/ioutil"
     26 	"os"
     27 	"sort"
     28 	"strconv"
     29 	"strings"
     30 )
     31 
     32 // inputFile represents a textual assembly file.
     33 type inputFile struct {
     34 	path string
     35 	// index is a unique identifer given to this file. It's used for
     36 	// mapping local symbols.
     37 	index int
     38 	// isArchive indicates that the input should be processed as an ar
     39 	// file.
     40 	isArchive bool
     41 	// contents contains the contents of the file.
     42 	contents string
     43 	// ast points to the head of the syntax tree.
     44 	ast *node32
     45 }
     46 
     47 type stringWriter interface {
     48 	WriteString(string) (int, error)
     49 }
     50 
     51 type processorType int
     52 
     53 const (
     54 	ppc64le processorType = iota + 1
     55 	x86_64
     56 )
     57 
     58 // delocation holds the state needed during a delocation operation.
     59 type delocation struct {
     60 	processor processorType
     61 	output    stringWriter
     62 
     63 	// symbols is the set of symbols defined in the module.
     64 	symbols map[string]struct{}
     65 	// localEntrySymbols is the set of symbols with .localentry directives.
     66 	localEntrySymbols map[string]struct{}
     67 	// redirectors maps from out-call symbol name to the name of a
     68 	// redirector function for that symbol. E.g. memcpy ->
     69 	// bcm_redirector_memcpy.
     70 	redirectors map[string]string
     71 	// bssAccessorsNeeded maps from a BSS symbol name to the symbol that
     72 	// should be used to reference it. E.g. P384_data_storage ->
     73 	// P384_data_storage.
     74 	bssAccessorsNeeded map[string]string
     75 	// tocLoaders is a set of symbol names for which TOC helper functions
     76 	// are required. (ppc64le only.)
     77 	tocLoaders map[string]struct{}
     78 	// gotExternalsNeeded is a set of symbol names for which we need
     79 	// delta symbols: symbols that contain the offset from their location
     80 	// to the memory in question.
     81 	gotExternalsNeeded map[string]struct{}
     82 
     83 	currentInput inputFile
     84 }
     85 
     86 func (d *delocation) contents(node *node32) string {
     87 	return d.currentInput.contents[node.begin:node.end]
     88 }
     89 
     90 // writeNode writes out an AST node.
     91 func (d *delocation) writeNode(node *node32) {
     92 	if _, err := d.output.WriteString(d.contents(node)); err != nil {
     93 		panic(err)
     94 	}
     95 }
     96 
     97 func (d *delocation) writeCommentedNode(node *node32) {
     98 	line := d.contents(node)
     99 	if _, err := d.output.WriteString("# WAS " + strings.TrimSpace(line) + "\n"); err != nil {
    100 		panic(err)
    101 	}
    102 }
    103 
    104 func locateError(err error, with *node32, in inputFile) error {
    105 	posMap := translatePositions([]rune(in.contents), []int{int(with.begin)})
    106 	var line int
    107 	for _, pos := range posMap {
    108 		line = pos.line
    109 	}
    110 
    111 	return fmt.Errorf("error while processing %q on line %d: %q", in.contents[with.begin:with.end], line, err)
    112 }
    113 
    114 func (d *delocation) processInput(input inputFile) (err error) {
    115 	d.currentInput = input
    116 
    117 	var origStatement *node32
    118 	defer func() {
    119 		if err := recover(); err != nil {
    120 			panic(locateError(fmt.Errorf("%s", err), origStatement, input))
    121 		}
    122 	}()
    123 
    124 	for statement := input.ast.up; statement != nil; statement = statement.next {
    125 		assertNodeType(statement, ruleStatement)
    126 		origStatement = statement
    127 
    128 		node := skipWS(statement.up)
    129 		if node == nil {
    130 			d.writeNode(statement)
    131 			continue
    132 		}
    133 
    134 		switch node.pegRule {
    135 		case ruleGlobalDirective, ruleComment, ruleLocationDirective:
    136 			d.writeNode(statement)
    137 		case ruleDirective:
    138 			statement, err = d.processDirective(statement, node.up)
    139 		case ruleLabelContainingDirective:
    140 			statement, err = d.processLabelContainingDirective(statement, node.up)
    141 		case ruleLabel:
    142 			statement, err = d.processLabel(statement, node.up)
    143 		case ruleInstruction:
    144 			switch d.processor {
    145 			case x86_64:
    146 				statement, err = d.processIntelInstruction(statement, node.up)
    147 			case ppc64le:
    148 				statement, err = d.processPPCInstruction(statement, node.up)
    149 			default:
    150 				panic("unknown processor")
    151 			}
    152 		default:
    153 			panic(fmt.Sprintf("unknown top-level statement type %q", rul3s[node.pegRule]))
    154 		}
    155 
    156 		if err != nil {
    157 			return locateError(err, origStatement, input)
    158 		}
    159 	}
    160 
    161 	return nil
    162 }
    163 
    164 func (d *delocation) processDirective(statement, directive *node32) (*node32, error) {
    165 	assertNodeType(directive, ruleDirectiveName)
    166 	directiveName := d.contents(directive)
    167 
    168 	var args []string
    169 	forEachPath(directive, func(arg *node32) {
    170 		// If the argument is a quoted string, use the raw contents.
    171 		// (Note that this doesn't unescape the string, but that's not
    172 		// needed so far.
    173 		if arg.up != nil {
    174 			arg = arg.up
    175 			assertNodeType(arg, ruleQuotedArg)
    176 			if arg.up == nil {
    177 				args = append(args, "")
    178 				return
    179 			}
    180 			arg = arg.up
    181 			assertNodeType(arg, ruleQuotedText)
    182 		}
    183 		args = append(args, d.contents(arg))
    184 	}, ruleArgs, ruleArg)
    185 
    186 	switch directiveName {
    187 	case "comm", "lcomm":
    188 		if len(args) < 1 {
    189 			return nil, errors.New("comm directive has no arguments")
    190 		}
    191 		d.bssAccessorsNeeded[args[0]] = args[0]
    192 		d.writeNode(statement)
    193 
    194 	case "data":
    195 		// ASAN and some versions of MSAN are adding a .data section,
    196 		// and adding references to symbols within it to the code. We
    197 		// will have to work around this in the future.
    198 		return nil, errors.New(".data section found in module")
    199 
    200 	case "section":
    201 		section := args[0]
    202 
    203 		if section == ".data.rel.ro" {
    204 			// In a normal build, this is an indication of a
    205 			// problem but any references from the module to this
    206 			// section will result in a relocation and thus will
    207 			// break the integrity check. ASAN can generate these
    208 			// sections and so we will likely have to work around
    209 			// that in the future.
    210 			return nil, errors.New(".data.rel.ro section found in module")
    211 		}
    212 
    213 		sectionType, ok := sectionType(section)
    214 		if !ok {
    215 			// Unknown sections are permitted in order to be robust
    216 			// to different compiler modes.
    217 			d.writeNode(statement)
    218 			break
    219 		}
    220 
    221 		switch sectionType {
    222 		case ".rodata", ".text":
    223 			// Move .rodata to .text so it may be accessed without
    224 			// a relocation. GCC with -fmerge-constants will place
    225 			// strings into separate sections, so we move all
    226 			// sections named like .rodata. Also move .text.startup
    227 			// so the self-test function is also in the module.
    228 			d.writeCommentedNode(statement)
    229 			d.output.WriteString(".text\n")
    230 
    231 		case ".data":
    232 			// See above about .data
    233 			return nil, errors.New(".data section found in module")
    234 
    235 		case ".init_array", ".fini_array", ".ctors", ".dtors":
    236 			// init_array/ctors/dtors contains function
    237 			// pointers to constructor/destructor
    238 			// functions. These contain relocations, but
    239 			// they're in a different section anyway.
    240 			d.writeNode(statement)
    241 			break
    242 
    243 		case ".debug", ".note", ".toc":
    244 			d.writeNode(statement)
    245 			break
    246 
    247 		case ".bss":
    248 			d.writeNode(statement)
    249 			return d.handleBSS(statement)
    250 		}
    251 
    252 	default:
    253 		d.writeNode(statement)
    254 	}
    255 
    256 	return statement, nil
    257 }
    258 
    259 func (d *delocation) processLabelContainingDirective(statement, directive *node32) (*node32, error) {
    260 	// The symbols within directives need to be mapped so that local
    261 	// symbols in two different .s inputs don't collide.
    262 	changed := false
    263 	assertNodeType(directive, ruleLabelContainingDirectiveName)
    264 	name := d.contents(directive)
    265 
    266 	node := directive.next
    267 	assertNodeType(node, ruleWS)
    268 
    269 	node = node.next
    270 	assertNodeType(node, ruleSymbolArgs)
    271 
    272 	var args []string
    273 	for node = skipWS(node.up); node != nil; node = skipWS(node.next) {
    274 		assertNodeType(node, ruleSymbolArg)
    275 		arg := node.up
    276 		var mapped string
    277 
    278 		for term := arg; term != nil; term = term.next {
    279 			if term.pegRule != ruleLocalSymbol {
    280 				mapped += d.contents(term)
    281 				continue
    282 			}
    283 
    284 			oldSymbol := d.contents(term)
    285 			newSymbol := d.mapLocalSymbol(oldSymbol)
    286 			if newSymbol != oldSymbol {
    287 				changed = true
    288 			}
    289 
    290 			mapped += newSymbol
    291 		}
    292 
    293 		args = append(args, mapped)
    294 	}
    295 
    296 	if !changed {
    297 		d.writeNode(statement)
    298 	} else {
    299 		d.writeCommentedNode(statement)
    300 		d.output.WriteString("\t" + name + "\t" + strings.Join(args, ", ") + "\n")
    301 	}
    302 
    303 	if name == ".localentry" {
    304 		d.output.WriteString(localEntryName(args[0]) + ":\n")
    305 	}
    306 
    307 	return statement, nil
    308 }
    309 
    310 func (d *delocation) processLabel(statement, label *node32) (*node32, error) {
    311 	symbol := d.contents(label)
    312 
    313 	switch label.pegRule {
    314 	case ruleLocalLabel:
    315 		d.output.WriteString(symbol + ":\n")
    316 	case ruleLocalSymbol:
    317 		// symbols need to be mapped so that local symbols from two
    318 		// different .s inputs don't collide.
    319 		d.output.WriteString(d.mapLocalSymbol(symbol) + ":\n")
    320 	case ruleSymbolName:
    321 		d.output.WriteString(localTargetName(symbol) + ":\n")
    322 		d.writeNode(statement)
    323 	default:
    324 		return nil, fmt.Errorf("unknown label type %q", rul3s[label.pegRule])
    325 	}
    326 
    327 	return statement, nil
    328 }
    329 
    330 // instructionArgs collects all the arguments to an instruction.
    331 func instructionArgs(node *node32) (argNodes []*node32) {
    332 	for node = skipWS(node); node != nil; node = skipWS(node.next) {
    333 		assertNodeType(node, ruleInstructionArg)
    334 		argNodes = append(argNodes, node.up)
    335 	}
    336 
    337 	return argNodes
    338 }
    339 
    340 /* ppc64le
    341 
    342 [PABI]: 64-Bit ELF V2 ABI Specification. Power Architecture. March 21st,
    343         2017
    344 
    345 (Also useful is Power ISA Version 2.07 B. Note that version three of that
    346 document is /not/ good as that's POWER9 specific.)
    347 
    348 ppc64le doesn't have IP-relative addressing and does a lot to work around this.
    349 Rather than reference a PLT and GOT direction, it has a single structure called
    350 the TOC (Table Of Contents). Within the TOC is the contents of .rodata, .data,
    351 .got, .plt, .bss, etc sections [PABI;3.3].
    352 
    353 A pointer to the TOC is maintained in r2 and the following pattern is used to
    354 load the address of an element into a register:
    355 
    356   addis <address register>, 2, foo@toc@ha
    357   addi <address register>, <address register>, foo@toc@l
    358 
    359 The addis instruction shifts a signed constant left 16 bits and adds the
    360 result to its second argument, saving the result in the first argument. The
    361 addi instruction does the same, but without shifting. Thus the @toc@ha"
    362 suffix on a symbol means the top 16 bits of the TOC offset and @toc@l means
    363 the bottom 16 bits of the offset. However, note that both values are signed,
    364 thus offsets in the top half of a 64KB chunk will have an @ha value that's one
    365 greater than expected and a negative @l value.
    366 
    367 The TOC is specific to a module (basically an executable or shared object).
    368 This means that there's not a single TOC in a process and that r2 needs to
    369 change as control moves between modules. Thus functions have two entry points:
    370 the global entry point and the local entry point. Jumps from within the
    371 same module can use the local entry while jumps from other modules must use the
    372 global entry. The global entry establishes the correct value of r2 before
    373 running the function and the local entry skips that code.
    374 
    375 The global entry point for a function is defined by its label. The local entry
    376 is a power-of-two number of bytes from the global entry, set by the
    377 .localentry directive. (ppc64le instructions are always 32 bits, so an offset
    378 of 1 or 2 bytes is treated as an offset of zero.)
    379 
    380 In order to help the global entry code set r2 to point to the local TOC, r12 is
    381 set to the address of the global entry point when called [PABI;2.2.1.1]. Thus
    382 the global entry will typically use an addis+addi pair to add a known offset to
    383 r12 and store it in r2. For example:
    384 
    385 foo:
    386   addis 2, 12, .TOC. - foo@ha
    387   addi  2, 2,  .TOC. - foo@l
    388 
    389 (It's worth noting that the '@' operator binds very loosely, so the 3rd
    390 arguments parse as (.TOC. - foo)@ha and (.TOC. - foo)@l.)
    391 
    392 When calling a function, the compiler doesn't know whether that function is in
    393 the same module or not. Thus it doesn't know whether r12 needs to be set nor
    394 whether r2 will be clobbered on return. Rather than always assume the worst,
    395 the linker fixes stuff up once it knows that a call is going out of module:
    396 
    397 Firstly, calling, say, memcpy (which we assume to be in a different module)
    398 won't actually jump directly to memcpy, or even a PLT resolution function.
    399 It'll call a synthesised function that:
    400   a) saves r2 in the caller's stack frame
    401   b) loads the address of memcpy@PLT into r12
    402   c) jumps to r12.
    403 
    404 As this synthesised function loads memcpy@PLT, a call to memcpy from the
    405 compiled code just references memcpy directly, not memcpy@PLT.
    406 
    407 Since it jumps directly to memcpy@PLT, it can't restore r2 on return. Thus
    408 calls must be followed by a nop. If the call ends up going out-of-module, the
    409 linker will rewrite that nop to load r2 from the stack.
    410 
    411 Speaking of the stack, the stack pointer is kept in r1 and there's a 288-byte
    412 red-zone. The format of the stack frame is defined [PABI;2.2.2] and must be
    413 followed as called functions will write into their parent's stack frame. For
    414 example, the synthesised out-of-module trampolines will save r2 24 bytes into
    415 the caller's frame and all non-leaf functions save the return address 16 bytes
    416 into the caller's frame.
    417 
    418 A final point worth noting: some RISC ISAs have r0 wired to zero: all reads
    419 result in zero and all writes are discarded. POWER does something a little like
    420 that, but r0 is only special in certain argument positions for certain
    421 instructions. You just have to read the manual to know which they are.
    422 
    423 
    424 Delocation is easier than Intel because there's just TOC references, but it's
    425 also harder because there's no IP-relative addressing.
    426 
    427 Jumps are IP-relative however, and have a 24-bit immediate value. So we can
    428 jump to functions that set a register to the needed value. (r3 is the
    429 return-value register and so that's what is generally used here.) */
    430 
    431 // isPPC64LEAPair recognises an addis+addi pair that's adding the offset of
    432 // source to relative and writing the result to target.
    433 func (d *delocation) isPPC64LEAPair(statement *node32) (target, source, relative string, ok bool) {
    434 	instruction := skipWS(statement.up).up
    435 	assertNodeType(instruction, ruleInstructionName)
    436 	name1 := d.contents(instruction)
    437 	args1 := instructionArgs(instruction.next)
    438 
    439 	statement = statement.next
    440 	instruction = skipWS(statement.up).up
    441 	assertNodeType(instruction, ruleInstructionName)
    442 	name2 := d.contents(instruction)
    443 	args2 := instructionArgs(instruction.next)
    444 
    445 	if name1 != "addis" ||
    446 		len(args1) != 3 ||
    447 		name2 != "addi" ||
    448 		len(args2) != 3 {
    449 		return "", "", "", false
    450 	}
    451 
    452 	target = d.contents(args1[0])
    453 	relative = d.contents(args1[1])
    454 	source1 := d.contents(args1[2])
    455 	source2 := d.contents(args2[2])
    456 
    457 	if !strings.HasSuffix(source1, "@ha") ||
    458 		!strings.HasSuffix(source2, "@l") ||
    459 		source1[:len(source1)-3] != source2[:len(source2)-2] ||
    460 		d.contents(args2[0]) != target ||
    461 		d.contents(args2[1]) != target {
    462 		return "", "", "", false
    463 	}
    464 
    465 	source = source1[:len(source1)-3]
    466 	ok = true
    467 	return
    468 }
    469 
    470 // establishTOC writes the global entry prelude for a function. The standard
    471 // prelude involves relocations so this version moves the relocation outside
    472 // the integrity-checked area.
    473 func establishTOC(w stringWriter) {
    474 	w.WriteString("999:\n")
    475 	w.WriteString("\taddis 2, 12, .LBORINGSSL_external_toc-999b@ha\n")
    476 	w.WriteString("\taddi 2, 2, .LBORINGSSL_external_toc-999b@l\n")
    477 	w.WriteString("\tld 12, 0(2)\n")
    478 	w.WriteString("\tadd 2, 2, 12\n")
    479 }
    480 
    481 // loadTOCFuncName returns the name of a synthesized function that sets r3 to
    482 // the value of symbol+offset.
    483 func loadTOCFuncName(symbol, offset string) string {
    484 	symbol = strings.Replace(symbol, ".", "_dot_", -1)
    485 	ret := ".Lbcm_loadtoc_" + symbol
    486 	if len(offset) != 0 {
    487 		offset = strings.Replace(offset, "+", "_plus_", -1)
    488 		offset = strings.Replace(offset, "-", "_minus_", -1)
    489 		ret += "_" + offset
    490 	}
    491 	return ret
    492 }
    493 
    494 func (d *delocation) loadFromTOC(w stringWriter, symbol, offset, dest string) wrapperFunc {
    495 	d.tocLoaders[symbol+"\x00"+offset] = struct{}{}
    496 
    497 	return func(k func()) {
    498 		w.WriteString("\taddi 1, 1, -288\n")   // Clear the red zone.
    499 		w.WriteString("\tmflr " + dest + "\n") // Stash the link register.
    500 		w.WriteString("\tstd " + dest + ", -8(1)\n")
    501 		// The TOC loader will use r3, so stash it if necessary.
    502 		if dest != "3" {
    503 			w.WriteString("\tstd 3, -16(1)\n")
    504 		}
    505 
    506 		// Because loadTOCFuncName returns a .L name, we don't need a
    507 		// nop after this call.
    508 		w.WriteString("\tbl " + loadTOCFuncName(symbol, offset) + "\n")
    509 
    510 		// Cycle registers around. We need r3 -> destReg, -8(1) ->
    511 		// lr and, optionally, -16(1) -> r3.
    512 		w.WriteString("\tstd 3, -24(1)\n")
    513 		w.WriteString("\tld 3, -8(1)\n")
    514 		w.WriteString("\tmtlr 3\n")
    515 		w.WriteString("\tld " + dest + ", -24(1)\n")
    516 		if dest != "3" {
    517 			w.WriteString("\tld 3, -16(1)\n")
    518 		}
    519 		w.WriteString("\taddi 1, 1, 288\n")
    520 
    521 		k()
    522 	}
    523 }
    524 
    525 func (d *delocation) gatherOffsets(symRef *node32, offsets string) (*node32, string) {
    526 	for symRef != nil && symRef.pegRule == ruleOffset {
    527 		offset := d.contents(symRef)
    528 		if offset[0] != '+' && offset[0] != '-' {
    529 			offset = "+" + offset
    530 		}
    531 		offsets = offsets + offset
    532 		symRef = symRef.next
    533 	}
    534 	return symRef, offsets
    535 }
    536 
    537 func (d *delocation) parseMemRef(memRef *node32) (symbol, offset, section string, didChange, symbolIsLocal bool, nextRef *node32) {
    538 	if memRef.pegRule != ruleSymbolRef {
    539 		return "", "", "", false, false, memRef
    540 	}
    541 
    542 	symRef := memRef.up
    543 	nextRef = memRef.next
    544 
    545 	// (Offset* '+')?
    546 	symRef, offset = d.gatherOffsets(symRef, offset)
    547 
    548 	// (LocalSymbol / SymbolName)
    549 	symbol = d.contents(symRef)
    550 	if symRef.pegRule == ruleLocalSymbol {
    551 		symbolIsLocal = true
    552 		mapped := d.mapLocalSymbol(symbol)
    553 		if mapped != symbol {
    554 			symbol = mapped
    555 			didChange = true
    556 		}
    557 	}
    558 	symRef = symRef.next
    559 
    560 	// Offset*
    561 	symRef, offset = d.gatherOffsets(symRef, offset)
    562 
    563 	// ('@' Section / Offset*)?
    564 	if symRef != nil {
    565 		assertNodeType(symRef, ruleSection)
    566 		section = d.contents(symRef)
    567 		symRef = symRef.next
    568 
    569 		symRef, offset = d.gatherOffsets(symRef, offset)
    570 	}
    571 
    572 	if symRef != nil {
    573 		panic(fmt.Sprintf("unexpected token in SymbolRef: %q", rul3s[symRef.pegRule]))
    574 	}
    575 
    576 	return
    577 }
    578 
    579 func (d *delocation) processPPCInstruction(statement, instruction *node32) (*node32, error) {
    580 	assertNodeType(instruction, ruleInstructionName)
    581 	instructionName := d.contents(instruction)
    582 	isBranch := instructionName[0] == 'b'
    583 
    584 	argNodes := instructionArgs(instruction.next)
    585 
    586 	var wrappers wrapperStack
    587 	var args []string
    588 	changed := false
    589 
    590 Args:
    591 	for i, arg := range argNodes {
    592 		fullArg := arg
    593 		isIndirect := false
    594 
    595 		if arg.pegRule == ruleIndirectionIndicator {
    596 			arg = arg.next
    597 			isIndirect = true
    598 		}
    599 
    600 		switch arg.pegRule {
    601 		case ruleRegisterOrConstant, ruleLocalLabelRef:
    602 			args = append(args, d.contents(fullArg))
    603 
    604 		case ruleTOCRefLow:
    605 			return nil, errors.New("Found low TOC reference outside preamble pattern")
    606 
    607 		case ruleTOCRefHigh:
    608 			target, _, relative, ok := d.isPPC64LEAPair(statement)
    609 			if !ok {
    610 				return nil, errors.New("Found high TOC reference outside preamble pattern")
    611 			}
    612 
    613 			if relative != "12" {
    614 				return nil, fmt.Errorf("preamble is relative to %q, not r12", relative)
    615 			}
    616 
    617 			if target != "2" {
    618 				return nil, fmt.Errorf("preamble is setting %q, not r2", target)
    619 			}
    620 
    621 			statement = statement.next
    622 			establishTOC(d.output)
    623 			instructionName = ""
    624 			changed = true
    625 			break Args
    626 
    627 		case ruleMemoryRef:
    628 			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
    629 			changed = didChange
    630 
    631 			if len(symbol) > 0 {
    632 				if _, localEntrySymbol := d.localEntrySymbols[symbol]; localEntrySymbol && isBranch {
    633 					symbol = localEntryName(symbol)
    634 					changed = true
    635 				} else if _, knownSymbol := d.symbols[symbol]; knownSymbol {
    636 					symbol = localTargetName(symbol)
    637 					changed = true
    638 				} else if !symbolIsLocal && !isSynthesized(symbol) && len(section) == 0 {
    639 					changed = true
    640 					d.redirectors[symbol] = redirectorName(symbol)
    641 					symbol = redirectorName(symbol)
    642 					// TODO(davidben): This should sanity-check the next
    643 					// instruction is a nop and ideally remove it.
    644 					wrappers = append(wrappers, func(k func()) {
    645 						k()
    646 						// Like the linker's PLT stubs, redirector functions
    647 						// expect callers to restore r2.
    648 						d.output.WriteString("\tld 2, 24(1)\n")
    649 					})
    650 				}
    651 			}
    652 
    653 			switch section {
    654 			case "":
    655 
    656 			case "tls":
    657 				// This section identifier just tells the
    658 				// assembler to use r13, the pointer to the
    659 				// thread-local data [PABI;3.7.3.3].
    660 
    661 			case "toc@ha":
    662 				// Delete toc@ha instructions. Per
    663 				// [PABI;3.6.3], the linker is allowed to erase
    664 				// toc@ha instructions. We take advantage of
    665 				// this by unconditionally erasing the toc@ha
    666 				// instructions and doing the full lookup when
    667 				// processing toc@l.
    668 				//
    669 				// Note that any offset here applies before @ha
    670 				// and @l. That is, 42+foo@toc@ha is
    671 				// #ha(42+foo-.TOC.), not 42+#ha(foo-.TOC.). Any
    672 				// corresponding toc@l references are required
    673 				// by the ABI to have the same offset. The
    674 				// offset will be incorporated in full when
    675 				// those are processed.
    676 				if instructionName != "addis" || len(argNodes) != 3 || i != 2 || args[1] != "2" {
    677 					return nil, errors.New("can't process toc@ha reference")
    678 				}
    679 				changed = true
    680 				instructionName = ""
    681 				break Args
    682 
    683 			case "toc@l":
    684 				// Per [PAB;3.6.3], this instruction must take
    685 				// as input a register which was the output of
    686 				// a toc@ha computation and compute the actual
    687 				// address of some symbol. The toc@ha
    688 				// computation was elided, so we ignore that
    689 				// input register and compute the address
    690 				// directly.
    691 				changed = true
    692 
    693 				// For all supported toc@l instructions, the
    694 				// destination register is the first argument.
    695 				destReg := args[0]
    696 
    697 				wrappers = append(wrappers, d.loadFromTOC(d.output, symbol, offset, destReg))
    698 				switch instructionName {
    699 				case "addi":
    700 					// The original instruction was:
    701 					//   addi destReg, tocHaReg, offset+symbol@toc@l
    702 					instructionName = ""
    703 
    704 				case "ld", "lhz", "lwz":
    705 					// The original instruction was:
    706 					//   l?? destReg, offset+symbol@toc@l(tocHaReg)
    707 					//
    708 					// We transform that into the
    709 					// equivalent dereference of destReg:
    710 					//   l?? destReg, 0(destReg)
    711 					origInstructionName := instructionName
    712 					instructionName = ""
    713 
    714 					assertNodeType(memRef, ruleBaseIndexScale)
    715 					assertNodeType(memRef.up, ruleRegisterOrConstant)
    716 					if memRef.next != nil || memRef.up.next != nil {
    717 						return nil, errors.New("expected single register in BaseIndexScale for ld argument")
    718 					}
    719 
    720 					baseReg := destReg
    721 					if baseReg == "0" {
    722 						// Register zero is special as the base register for a load.
    723 						// Avoid it by spilling and using r3 instead.
    724 						baseReg = "3"
    725 						wrappers = append(wrappers, func(k func()) {
    726 							d.output.WriteString("\taddi 1, 1, -288\n") // Clear the red zone.
    727 							d.output.WriteString("\tstd " + baseReg + ", -8(1)\n")
    728 							d.output.WriteString("\tmr " + baseReg + ", " + destReg + "\n")
    729 							k()
    730 							d.output.WriteString("\tld " + baseReg + ", -8(1)\n")
    731 							d.output.WriteString("\taddi 1, 1, 288\n") // Clear the red zone.
    732 						})
    733 					}
    734 
    735 					wrappers = append(wrappers, func(k func()) {
    736 						d.output.WriteString("\t" + origInstructionName + " " + destReg + ", 0(" + baseReg + ")\n")
    737 					})
    738 				default:
    739 					return nil, fmt.Errorf("can't process TOC argument to %q", instructionName)
    740 				}
    741 
    742 			default:
    743 				return nil, fmt.Errorf("Unknown section type %q", section)
    744 			}
    745 
    746 			argStr := ""
    747 			if isIndirect {
    748 				argStr += "*"
    749 			}
    750 			argStr += symbol
    751 			if len(offset) > 0 {
    752 				argStr += offset
    753 			}
    754 			if len(section) > 0 {
    755 				argStr += "@"
    756 				argStr += section
    757 			}
    758 
    759 			for ; memRef != nil; memRef = memRef.next {
    760 				argStr += d.contents(memRef)
    761 			}
    762 
    763 			args = append(args, argStr)
    764 
    765 		default:
    766 			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
    767 		}
    768 	}
    769 
    770 	if changed {
    771 		d.writeCommentedNode(statement)
    772 
    773 		var replacement string
    774 		if len(instructionName) > 0 {
    775 			replacement = "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
    776 		}
    777 
    778 		wrappers.do(func() {
    779 			d.output.WriteString(replacement)
    780 		})
    781 	} else {
    782 		d.writeNode(statement)
    783 	}
    784 
    785 	return statement, nil
    786 }
    787 
    788 /* Intel */
    789 
    790 type instructionType int
    791 
    792 const (
    793 	instrPush instructionType = iota
    794 	instrMove
    795 	instrJump
    796 	instrConditionalMove
    797 	instrOther
    798 )
    799 
    800 func classifyInstruction(instr string, args []*node32) instructionType {
    801 	switch instr {
    802 	case "push", "pushq":
    803 		if len(args) == 1 {
    804 			return instrPush
    805 		}
    806 
    807 	case "mov", "movq", "vmovq":
    808 		if len(args) == 2 {
    809 			return instrMove
    810 		}
    811 
    812 	case "cmovneq", "cmoveq":
    813 		if len(args) == 2 {
    814 			return instrConditionalMove
    815 		}
    816 
    817 	case "call", "callq", "jmp", "jo", "jno", "js", "jns", "je", "jz", "jne", "jnz", "jb", "jnae", "jc", "jnb", "jae", "jnc", "jbe", "jna", "ja", "jnbe", "jl", "jnge", "jge", "jnl", "jle", "jng", "jg", "jnle", "jp", "jpe", "jnp", "jpo":
    818 		if len(args) == 1 {
    819 			return instrJump
    820 		}
    821 	}
    822 
    823 	return instrOther
    824 }
    825 
    826 func push(w stringWriter) wrapperFunc {
    827 	return func(k func()) {
    828 		w.WriteString("\tpushq %rax\n")
    829 		k()
    830 		w.WriteString("\txchg %rax, (%rsp)\n")
    831 	}
    832 }
    833 
    834 func (d *delocation) loadFromGOT(w stringWriter, destination, symbol, section string, redzoneCleared bool) wrapperFunc {
    835 	d.gotExternalsNeeded[symbol+"@"+section] = struct{}{}
    836 
    837 	return func(k func()) {
    838 		if !redzoneCleared {
    839 			w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
    840 		}
    841 		w.WriteString("\tpushf\n")
    842 		w.WriteString(fmt.Sprintf("\tleaq %s_%s_external(%%rip), %s\n", symbol, section, destination))
    843 		w.WriteString(fmt.Sprintf("\taddq (%s), %s\n", destination, destination))
    844 		w.WriteString(fmt.Sprintf("\tmovq (%s), %s\n", destination, destination))
    845 		w.WriteString("\tpopf\n")
    846 		if !redzoneCleared {
    847 			w.WriteString("\tleaq\t128(%rsp), %rsp\n")
    848 		}
    849 	}
    850 }
    851 
    852 func saveRegister(w stringWriter) wrapperFunc {
    853 	return func(k func()) {
    854 		w.WriteString("\tleaq -128(%rsp), %rsp\n") // Clear the red zone.
    855 		w.WriteString("\tpushq %rax\n")
    856 		k()
    857 		w.WriteString("\tpopq %rax\n")
    858 		w.WriteString("\tleaq 128(%rsp), %rsp\n")
    859 	}
    860 }
    861 
    862 func moveTo(w stringWriter, target string) wrapperFunc {
    863 	return func(k func()) {
    864 		k()
    865 		w.WriteString("\tmovq %rax, " + target + "\n")
    866 	}
    867 }
    868 
    869 func isValidLEATarget(reg string) bool {
    870 	return !strings.HasPrefix(reg, "%xmm") && !strings.HasPrefix(reg, "%ymm") && !strings.HasPrefix(reg, "%zmm")
    871 }
    872 
    873 func undoConditionalMove(w stringWriter, instr string) wrapperFunc {
    874 	var invertedCondition string
    875 
    876 	switch instr {
    877 	case "cmoveq":
    878 		invertedCondition = "ne"
    879 	case "cmovneq":
    880 		invertedCondition = "e"
    881 	default:
    882 		panic(fmt.Sprintf("don't know how to handle conditional move instruction %q", instr))
    883 	}
    884 
    885 	return func(k func()) {
    886 		w.WriteString("\tj" + invertedCondition + " 999f\n")
    887 		k()
    888 		w.WriteString("999:\n")
    889 	}
    890 }
    891 
    892 func (d *delocation) isRIPRelative(node *node32) bool {
    893 	return node != nil && node.pegRule == ruleBaseIndexScale && d.contents(node) == "(%rip)"
    894 }
    895 
    896 func (d *delocation) processIntelInstruction(statement, instruction *node32) (*node32, error) {
    897 	assertNodeType(instruction, ruleInstructionName)
    898 	instructionName := d.contents(instruction)
    899 
    900 	argNodes := instructionArgs(instruction.next)
    901 
    902 	var wrappers wrapperStack
    903 	var args []string
    904 	changed := false
    905 
    906 Args:
    907 	for i, arg := range argNodes {
    908 		fullArg := arg
    909 		isIndirect := false
    910 
    911 		if arg.pegRule == ruleIndirectionIndicator {
    912 			arg = arg.next
    913 			isIndirect = true
    914 		}
    915 
    916 		switch arg.pegRule {
    917 		case ruleRegisterOrConstant, ruleLocalLabelRef:
    918 			args = append(args, d.contents(fullArg))
    919 
    920 		case ruleMemoryRef:
    921 			symbol, offset, section, didChange, symbolIsLocal, memRef := d.parseMemRef(arg.up)
    922 			changed = didChange
    923 
    924 			if symbol == "OPENSSL_ia32cap_P" {
    925 				var ok bool
    926 				if section == "GOTPCREL" {
    927 					ok = instructionName == "movq"
    928 				} else if section == "" {
    929 					ok = instructionName == "leaq"
    930 				}
    931 
    932 				if !ok {
    933 					return nil, fmt.Errorf("instruction %q referenced OPENSSL_ia32cap_P in section %q, should be a movq from GOTPCREL or a direct leaq", instructionName, section)
    934 				}
    935 
    936 				if i != 0 || len(argNodes) != 2 || !d.isRIPRelative(memRef) || len(offset) > 0 {
    937 					return nil, fmt.Errorf("invalid OPENSSL_ia32cap_P reference in instruction %q", instructionName)
    938 				}
    939 
    940 				target := argNodes[1]
    941 				assertNodeType(target, ruleRegisterOrConstant)
    942 				reg := d.contents(target)
    943 
    944 				if !strings.HasPrefix(reg, "%r") {
    945 					return nil, fmt.Errorf("tried to load OPENSSL_ia32cap_P into %q, which is not a standard register.", reg)
    946 				}
    947 
    948 				changed = true
    949 				wrappers = append(wrappers, func(k func()) {
    950 					d.output.WriteString("\tleaq\t-128(%rsp), %rsp\n") // Clear the red zone.
    951 					d.output.WriteString("\tpushfq\n")
    952 					d.output.WriteString("\tleaq\tOPENSSL_ia32cap_addr_delta(%rip), " + reg + "\n")
    953 					d.output.WriteString("\taddq\t(" + reg + "), " + reg + "\n")
    954 					d.output.WriteString("\tpopfq\n")
    955 					d.output.WriteString("\tleaq\t128(%rsp), %rsp\n")
    956 				})
    957 
    958 				break Args
    959 			}
    960 
    961 			switch section {
    962 			case "":
    963 				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
    964 					symbol = localTargetName(symbol)
    965 					changed = true
    966 				}
    967 
    968 			case "PLT":
    969 				if classifyInstruction(instructionName, argNodes) != instrJump {
    970 					return nil, fmt.Errorf("Cannot rewrite PLT reference for non-jump instruction %q", instructionName)
    971 				}
    972 
    973 				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
    974 					symbol = localTargetName(symbol)
    975 					changed = true
    976 				} else if !symbolIsLocal && !isSynthesized(symbol) {
    977 					// Unknown symbol via PLT is an
    978 					// out-call from the module, e.g.
    979 					// memcpy.
    980 					d.redirectors[symbol+"@"+section] = redirectorName(symbol)
    981 					symbol = redirectorName(symbol)
    982 				}
    983 
    984 				changed = true
    985 
    986 			case "GOTPCREL":
    987 				if len(offset) > 0 {
    988 					return nil, errors.New("loading from GOT with offset is unsupported")
    989 				}
    990 				if i != 0 {
    991 					return nil, errors.New("GOT access must be source operand")
    992 				}
    993 				if !d.isRIPRelative(memRef) {
    994 					return nil, errors.New("GOT access must be IP-relative")
    995 				}
    996 
    997 				useGOT := false
    998 				if _, knownSymbol := d.symbols[symbol]; knownSymbol {
    999 					symbol = localTargetName(symbol)
   1000 					changed = true
   1001 				} else if !isSynthesized(symbol) {
   1002 					useGOT = true
   1003 				}
   1004 
   1005 				// Reduce the instruction to movq symbol@GOTPCREL, targetReg.
   1006 				var targetReg string
   1007 				switch classifyInstruction(instructionName, argNodes) {
   1008 				case instrPush:
   1009 					wrappers = append(wrappers, push(d.output))
   1010 					targetReg = "%rax"
   1011 				case instrConditionalMove:
   1012 					wrappers = append(wrappers, undoConditionalMove(d.output, instructionName))
   1013 					fallthrough
   1014 				case instrMove:
   1015 					assertNodeType(argNodes[1], ruleRegisterOrConstant)
   1016 					targetReg = d.contents(argNodes[1])
   1017 				default:
   1018 					return nil, fmt.Errorf("Cannot rewrite GOTPCREL reference for instruction %q", instructionName)
   1019 				}
   1020 
   1021 				var redzoneCleared bool
   1022 				if !isValidLEATarget(targetReg) {
   1023 					// Sometimes the compiler will load from the GOT to an
   1024 					// XMM register, which is not a valid target of an LEA
   1025 					// instruction.
   1026 					wrappers = append(wrappers, saveRegister(d.output))
   1027 					wrappers = append(wrappers, moveTo(d.output, targetReg))
   1028 					targetReg = "%rax"
   1029 					redzoneCleared = true
   1030 				}
   1031 
   1032 				if useGOT {
   1033 					wrappers = append(wrappers, d.loadFromGOT(d.output, targetReg, symbol, section, redzoneCleared))
   1034 				} else {
   1035 					wrappers = append(wrappers, func(k func()) {
   1036 						d.output.WriteString(fmt.Sprintf("\tleaq\t%s(%%rip), %s\n", symbol, targetReg))
   1037 					})
   1038 				}
   1039 				changed = true
   1040 				break Args
   1041 
   1042 			default:
   1043 				return nil, fmt.Errorf("Unknown section type %q", section)
   1044 			}
   1045 
   1046 			if !changed && len(section) > 0 {
   1047 				panic("section was not handled")
   1048 			}
   1049 			section = ""
   1050 
   1051 			argStr := ""
   1052 			if isIndirect {
   1053 				argStr += "*"
   1054 			}
   1055 			argStr += symbol
   1056 			argStr += offset
   1057 
   1058 			for ; memRef != nil; memRef = memRef.next {
   1059 				argStr += d.contents(memRef)
   1060 			}
   1061 
   1062 			args = append(args, argStr)
   1063 
   1064 		default:
   1065 			panic(fmt.Sprintf("unknown instruction argument type %q", rul3s[arg.pegRule]))
   1066 		}
   1067 	}
   1068 
   1069 	if changed {
   1070 		d.writeCommentedNode(statement)
   1071 		replacement := "\t" + instructionName + "\t" + strings.Join(args, ", ") + "\n"
   1072 		wrappers.do(func() {
   1073 			d.output.WriteString(replacement)
   1074 		})
   1075 	} else {
   1076 		d.writeNode(statement)
   1077 	}
   1078 
   1079 	return statement, nil
   1080 }
   1081 
   1082 func (d *delocation) handleBSS(statement *node32) (*node32, error) {
   1083 	lastStatement := statement
   1084 	for statement = statement.next; statement != nil; lastStatement, statement = statement, statement.next {
   1085 		node := skipWS(statement.up)
   1086 		if node == nil {
   1087 			d.writeNode(statement)
   1088 			continue
   1089 		}
   1090 
   1091 		switch node.pegRule {
   1092 		case ruleGlobalDirective, ruleComment, ruleInstruction, ruleLocationDirective:
   1093 			d.writeNode(statement)
   1094 
   1095 		case ruleDirective:
   1096 			directive := node.up
   1097 			assertNodeType(directive, ruleDirectiveName)
   1098 			directiveName := d.contents(directive)
   1099 			if directiveName == "text" || directiveName == "section" || directiveName == "data" {
   1100 				return lastStatement, nil
   1101 			}
   1102 			d.writeNode(statement)
   1103 
   1104 		case ruleLabel:
   1105 			label := node.up
   1106 			d.writeNode(statement)
   1107 
   1108 			if label.pegRule != ruleLocalSymbol {
   1109 				symbol := d.contents(label)
   1110 				localSymbol := localTargetName(symbol)
   1111 				d.output.WriteString(fmt.Sprintf("\n%s:\n", localSymbol))
   1112 
   1113 				d.bssAccessorsNeeded[symbol] = localSymbol
   1114 			}
   1115 
   1116 		case ruleLabelContainingDirective:
   1117 			var err error
   1118 			statement, err = d.processLabelContainingDirective(statement, node.up)
   1119 			if err != nil {
   1120 				return nil, err
   1121 			}
   1122 
   1123 		default:
   1124 			return nil, fmt.Errorf("unknown BSS statement type %q in %q", rul3s[node.pegRule], d.contents(statement))
   1125 		}
   1126 	}
   1127 
   1128 	return lastStatement, nil
   1129 }
   1130 
   1131 func transform(w stringWriter, inputs []inputFile) error {
   1132 	// symbols contains all defined symbols.
   1133 	symbols := make(map[string]struct{})
   1134 	// localEntrySymbols contains all symbols with a .localentry directive.
   1135 	localEntrySymbols := make(map[string]struct{})
   1136 
   1137 	for _, input := range inputs {
   1138 		forEachPath(input.ast.up, func(node *node32) {
   1139 			symbol := input.contents[node.begin:node.end]
   1140 			if _, ok := symbols[symbol]; ok {
   1141 				panic(fmt.Sprintf("Duplicate symbol found: %q in %q", symbol, input.path))
   1142 			}
   1143 			symbols[symbol] = struct{}{}
   1144 		}, ruleStatement, ruleLabel, ruleSymbolName)
   1145 
   1146 		forEachPath(input.ast.up, func(node *node32) {
   1147 			node = node.up
   1148 			assertNodeType(node, ruleLabelContainingDirectiveName)
   1149 			directive := input.contents[node.begin:node.end]
   1150 			if directive != ".localentry" {
   1151 				return
   1152 			}
   1153 			// Extract the first argument.
   1154 			node = skipWS(node.next)
   1155 			assertNodeType(node, ruleSymbolArgs)
   1156 			node = node.up
   1157 			assertNodeType(node, ruleSymbolArg)
   1158 			symbol := input.contents[node.begin:node.end]
   1159 			if _, ok := localEntrySymbols[symbol]; ok {
   1160 				panic(fmt.Sprintf("Duplicate .localentry directive found: %q in %q", symbol, input.path))
   1161 			}
   1162 			localEntrySymbols[symbol] = struct{}{}
   1163 		}, ruleStatement, ruleLabelContainingDirective)
   1164 	}
   1165 
   1166 	processor := x86_64
   1167 	if len(inputs) > 0 {
   1168 		processor = detectProcessor(inputs[0])
   1169 	}
   1170 
   1171 	d := &delocation{
   1172 		symbols:            symbols,
   1173 		localEntrySymbols:  localEntrySymbols,
   1174 		processor:          processor,
   1175 		output:             w,
   1176 		redirectors:        make(map[string]string),
   1177 		bssAccessorsNeeded: make(map[string]string),
   1178 		tocLoaders:         make(map[string]struct{}),
   1179 		gotExternalsNeeded: make(map[string]struct{}),
   1180 	}
   1181 
   1182 	w.WriteString(".text\nBORINGSSL_bcm_text_start:\n")
   1183 
   1184 	for _, input := range inputs {
   1185 		if err := d.processInput(input); err != nil {
   1186 			return err
   1187 		}
   1188 	}
   1189 
   1190 	w.WriteString(".text\nBORINGSSL_bcm_text_end:\n")
   1191 
   1192 	// Emit redirector functions. Each is a single jump instruction.
   1193 	var redirectorNames []string
   1194 	for name := range d.redirectors {
   1195 		redirectorNames = append(redirectorNames, name)
   1196 	}
   1197 	sort.Strings(redirectorNames)
   1198 
   1199 	for _, name := range redirectorNames {
   1200 		redirector := d.redirectors[name]
   1201 		if d.processor == ppc64le {
   1202 			w.WriteString(".section \".toc\", \"aw\"\n")
   1203 			w.WriteString(".Lredirector_toc_" + name + ":\n")
   1204 			w.WriteString(".quad " + name + "\n")
   1205 			w.WriteString(".text\n")
   1206 			w.WriteString(".type " + redirector + ", @function\n")
   1207 			w.WriteString(redirector + ":\n")
   1208 			// |name| will clobber r2, so save it. This is matched by a restore in
   1209 			// redirector calls.
   1210 			w.WriteString("\tstd 2, 24(1)\n")
   1211 			// Load and call |name|'s global entry point.
   1212 			w.WriteString("\taddis 12, 2, .Lredirector_toc_" + name + "@toc@ha\n")
   1213 			w.WriteString("\tld 12, .Lredirector_toc_" + name + "@toc@l(12)\n")
   1214 			w.WriteString("\tmtctr 12\n")
   1215 			w.WriteString("\tbctr\n")
   1216 		} else {
   1217 			w.WriteString(".type " + redirector + ", @function\n")
   1218 			w.WriteString(redirector + ":\n")
   1219 			w.WriteString("\tjmp\t" + name + "\n")
   1220 		}
   1221 	}
   1222 
   1223 	var accessorNames []string
   1224 	for accessor := range d.bssAccessorsNeeded {
   1225 		accessorNames = append(accessorNames, accessor)
   1226 	}
   1227 	sort.Strings(accessorNames)
   1228 
   1229 	// Emit BSS accessor functions. Each is a single LEA followed by RET.
   1230 	for _, name := range accessorNames {
   1231 		funcName := accessorName(name)
   1232 		w.WriteString(".type " + funcName + ", @function\n")
   1233 		w.WriteString(funcName + ":\n")
   1234 		target := d.bssAccessorsNeeded[name]
   1235 
   1236 		if d.processor == ppc64le {
   1237 			w.WriteString("\taddis 3, 2, " + target + "@toc@ha\n")
   1238 			w.WriteString("\taddi 3, 3, " + target + "@toc@l\n")
   1239 			w.WriteString("\tblr\n")
   1240 		} else {
   1241 			w.WriteString("\tleaq\t" + target + "(%rip), %rax\n\tret\n")
   1242 		}
   1243 	}
   1244 
   1245 	if d.processor == ppc64le {
   1246 		loadTOCNames := sortedSet(d.tocLoaders)
   1247 		for _, symbolAndOffset := range loadTOCNames {
   1248 			parts := strings.SplitN(symbolAndOffset, "\x00", 2)
   1249 			symbol, offset := parts[0], parts[1]
   1250 
   1251 			funcName := loadTOCFuncName(symbol, offset)
   1252 			ref := symbol + offset
   1253 
   1254 			w.WriteString(".type " + funcName[2:] + ", @function\n")
   1255 			w.WriteString(funcName[2:] + ":\n")
   1256 			w.WriteString(funcName + ":\n")
   1257 			w.WriteString("\taddis 3, 2, " + ref + "@toc@ha\n")
   1258 			w.WriteString("\taddi 3, 3, " + ref + "@toc@l\n")
   1259 			w.WriteString("\tblr\n")
   1260 		}
   1261 
   1262 		w.WriteString(".LBORINGSSL_external_toc:\n")
   1263 		w.WriteString(".quad .TOC.-.LBORINGSSL_external_toc\n")
   1264 	} else {
   1265 		externalNames := sortedSet(d.gotExternalsNeeded)
   1266 		for _, name := range externalNames {
   1267 			parts := strings.SplitN(name, "@", 2)
   1268 			symbol, section := parts[0], parts[1]
   1269 			w.WriteString(".type " + symbol + "_" + section + "_external, @object\n")
   1270 			w.WriteString(".size " + symbol + "_" + section + "_external, 8\n")
   1271 			w.WriteString(symbol + "_" + section + "_external:\n")
   1272 			// Ideally this would be .quad foo@GOTPCREL, but clang's
   1273 			// assembler cannot emit a 64-bit GOTPCREL relocation. Instead,
   1274 			// we manually sign-extend the value, knowing that the GOT is
   1275 			// always at the end, thus foo@GOTPCREL has a positive value.
   1276 			w.WriteString("\t.long " + symbol + "@" + section + "\n")
   1277 			w.WriteString("\t.long 0\n")
   1278 		}
   1279 
   1280 		w.WriteString(".type OPENSSL_ia32cap_get, @function\n")
   1281 		w.WriteString("OPENSSL_ia32cap_get:\n")
   1282 		w.WriteString("\tleaq OPENSSL_ia32cap_P(%rip), %rax\n")
   1283 		w.WriteString("\tret\n")
   1284 
   1285 		w.WriteString(".extern OPENSSL_ia32cap_P\n")
   1286 		w.WriteString(".type OPENSSL_ia32cap_addr_delta, @object\n")
   1287 		w.WriteString(".size OPENSSL_ia32cap_addr_delta, 8\n")
   1288 		w.WriteString("OPENSSL_ia32cap_addr_delta:\n")
   1289 		w.WriteString(".quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta\n")
   1290 	}
   1291 
   1292 	w.WriteString(".type BORINGSSL_bcm_text_hash, @object\n")
   1293 	w.WriteString(".size BORINGSSL_bcm_text_hash, 64\n")
   1294 	w.WriteString("BORINGSSL_bcm_text_hash:\n")
   1295 	for _, b := range uninitHashValue {
   1296 		w.WriteString(".byte 0x" + strconv.FormatUint(uint64(b), 16) + "\n")
   1297 	}
   1298 
   1299 	return nil
   1300 }
   1301 
   1302 func parseInputs(inputs []inputFile) error {
   1303 	for i, input := range inputs {
   1304 		var contents string
   1305 
   1306 		if input.isArchive {
   1307 			arFile, err := os.Open(input.path)
   1308 			if err != nil {
   1309 				return err
   1310 			}
   1311 			defer arFile.Close()
   1312 
   1313 			ar, err := ParseAR(arFile)
   1314 			if err != nil {
   1315 				return err
   1316 			}
   1317 
   1318 			if len(ar) != 1 {
   1319 				return fmt.Errorf("expected one file in archive, but found %d", len(ar))
   1320 			}
   1321 
   1322 			for _, c := range ar {
   1323 				contents = string(c)
   1324 			}
   1325 		} else {
   1326 			inBytes, err := ioutil.ReadFile(input.path)
   1327 			if err != nil {
   1328 				return err
   1329 			}
   1330 
   1331 			contents = string(inBytes)
   1332 		}
   1333 
   1334 		asm := Asm{Buffer: contents, Pretty: true}
   1335 		asm.Init()
   1336 		if err := asm.Parse(); err != nil {
   1337 			return fmt.Errorf("error while parsing %q: %s", input.path, err)
   1338 		}
   1339 		ast := asm.AST()
   1340 
   1341 		inputs[i].contents = contents
   1342 		inputs[i].ast = ast
   1343 	}
   1344 
   1345 	return nil
   1346 }
   1347 
   1348 func main() {
   1349 	// The .a file, if given, is expected to be an archive of textual
   1350 	// assembly sources. That's odd, but CMake really wants to create
   1351 	// archive files so it's the only way that we can make it work.
   1352 	arInput := flag.String("a", "", "Path to a .a file containing assembly sources")
   1353 	outFile := flag.String("o", "", "Path to output assembly")
   1354 
   1355 	flag.Parse()
   1356 
   1357 	if len(*outFile) == 0 {
   1358 		fmt.Fprintf(os.Stderr, "Must give argument to -o.\n")
   1359 		os.Exit(1)
   1360 	}
   1361 
   1362 	var inputs []inputFile
   1363 	if len(*arInput) > 0 {
   1364 		inputs = append(inputs, inputFile{
   1365 			path:      *arInput,
   1366 			index:     0,
   1367 			isArchive: true,
   1368 		})
   1369 	}
   1370 
   1371 	for i, path := range flag.Args() {
   1372 		if len(path) == 0 {
   1373 			continue
   1374 		}
   1375 
   1376 		inputs = append(inputs, inputFile{
   1377 			path:  path,
   1378 			index: i + 1,
   1379 		})
   1380 	}
   1381 
   1382 	if err := parseInputs(inputs); err != nil {
   1383 		fmt.Fprintf(os.Stderr, "%s\n", err)
   1384 		os.Exit(1)
   1385 	}
   1386 
   1387 	out, err := os.OpenFile(*outFile, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
   1388 	if err != nil {
   1389 		panic(err)
   1390 	}
   1391 	defer out.Close()
   1392 
   1393 	if err := transform(out, inputs); err != nil {
   1394 		fmt.Fprintf(os.Stderr, "%s\n", err)
   1395 		os.Exit(1)
   1396 	}
   1397 }
   1398 
   1399 func forEachPath(node *node32, cb func(*node32), rules ...pegRule) {
   1400 	if node == nil {
   1401 		return
   1402 	}
   1403 
   1404 	if len(rules) == 0 {
   1405 		cb(node)
   1406 		return
   1407 	}
   1408 
   1409 	rule := rules[0]
   1410 	childRules := rules[1:]
   1411 
   1412 	for ; node != nil; node = node.next {
   1413 		if node.pegRule != rule {
   1414 			continue
   1415 		}
   1416 
   1417 		if len(childRules) == 0 {
   1418 			cb(node)
   1419 		} else {
   1420 			forEachPath(node.up, cb, childRules...)
   1421 		}
   1422 	}
   1423 }
   1424 
   1425 func skipNodes(node *node32, ruleToSkip pegRule) *node32 {
   1426 	for ; node != nil && node.pegRule == ruleToSkip; node = node.next {
   1427 	}
   1428 	return node
   1429 }
   1430 
   1431 func skipWS(node *node32) *node32 {
   1432 	return skipNodes(node, ruleWS)
   1433 }
   1434 
   1435 func assertNodeType(node *node32, expected pegRule) {
   1436 	if rule := node.pegRule; rule != expected {
   1437 		panic(fmt.Sprintf("node was %q, but wanted %q", rul3s[rule], rul3s[expected]))
   1438 	}
   1439 }
   1440 
   1441 type wrapperFunc func(func())
   1442 
   1443 type wrapperStack []wrapperFunc
   1444 
   1445 func (w *wrapperStack) do(baseCase func()) {
   1446 	if len(*w) == 0 {
   1447 		baseCase()
   1448 		return
   1449 	}
   1450 
   1451 	wrapper := (*w)[0]
   1452 	*w = (*w)[1:]
   1453 	wrapper(func() { w.do(baseCase) })
   1454 }
   1455 
   1456 // localTargetName returns the name of the local target label for a global
   1457 // symbol named name.
   1458 func localTargetName(name string) string {
   1459 	return ".L" + name + "_local_target"
   1460 }
   1461 
   1462 func localEntryName(name string) string {
   1463 	return ".L" + name + "_local_entry"
   1464 }
   1465 
   1466 func isSynthesized(symbol string) bool {
   1467 	return strings.HasSuffix(symbol, "_bss_get") ||
   1468 		symbol == "OPENSSL_ia32cap_get" ||
   1469 		strings.HasPrefix(symbol, "BORINGSSL_bcm_text_")
   1470 }
   1471 
   1472 func redirectorName(symbol string) string {
   1473 	return "bcm_redirector_" + symbol
   1474 }
   1475 
   1476 // sectionType returns the type of a section. I.e. a section called .text.foo
   1477 // is a .text section.
   1478 func sectionType(section string) (string, bool) {
   1479 	if len(section) == 0 || section[0] != '.' {
   1480 		return "", false
   1481 	}
   1482 
   1483 	i := strings.Index(section[1:], ".")
   1484 	if i != -1 {
   1485 		section = section[:i+1]
   1486 	}
   1487 
   1488 	if strings.HasPrefix(section, ".debug_") {
   1489 		return ".debug", true
   1490 	}
   1491 
   1492 	return section, true
   1493 }
   1494 
   1495 // accessorName returns the name of the accessor function for a BSS symbol
   1496 // named name.
   1497 func accessorName(name string) string {
   1498 	return name + "_bss_get"
   1499 }
   1500 
   1501 func (d *delocation) mapLocalSymbol(symbol string) string {
   1502 	if d.currentInput.index == 0 {
   1503 		return symbol
   1504 	}
   1505 	return symbol + "_BCM_" + strconv.Itoa(d.currentInput.index)
   1506 }
   1507 
   1508 func detectProcessor(input inputFile) processorType {
   1509 	for statement := input.ast.up; statement != nil; statement = statement.next {
   1510 		node := skipNodes(statement.up, ruleWS)
   1511 		if node == nil || node.pegRule != ruleInstruction {
   1512 			continue
   1513 		}
   1514 
   1515 		instruction := node.up
   1516 		instructionName := input.contents[instruction.begin:instruction.end]
   1517 
   1518 		switch instructionName {
   1519 		case "movq", "call", "leaq":
   1520 			return x86_64
   1521 		case "addis", "addi", "mflr":
   1522 			return ppc64le
   1523 		}
   1524 	}
   1525 
   1526 	panic("processed entire input and didn't recognise any instructions.")
   1527 }
   1528 
   1529 func sortedSet(m map[string]struct{}) []string {
   1530 	ret := make([]string, 0, len(m))
   1531 	for key := range m {
   1532 		ret = append(ret, key)
   1533 	}
   1534 	sort.Strings(ret)
   1535 	return ret
   1536 }
   1537