1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 12 // The following thunks allow calling the gcc-compiled race runtime directly 13 // from Go code without going all the way through cgo. 14 // First, it's much faster (up to 50% speedup for real Go programs). 15 // Second, it eliminates race-related special cases from cgocall and scheduler. 16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18 // A brief recap of the amd64 calling convention. 19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20 // Callee-saved registers are: BX, BP, R12-R15. 21 // SP must be 16-byte aligned. 22 // On Windows: 23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26 // http://msdn.microsoft.com/en-us/library/ms235286.aspx 27 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 28 // Gcc-compiled race runtime does not try to use that space. 29 30 #ifdef GOOS_windows 31 #define RARG0 CX 32 #define RARG1 DX 33 #define RARG2 R8 34 #define RARG3 R9 35 #else 36 #define RARG0 DI 37 #define RARG1 SI 38 #define RARG2 DX 39 #define RARG3 CX 40 #endif 41 42 // func runtimeraceread(addr uintptr) 43 // Called from instrumented code. 44 TEXT runtimeraceread(SB), NOSPLIT, $0-8 45 MOVQ addr+0(FP), RARG1 46 MOVQ (SP), RARG2 47 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 48 MOVQ $__tsan_read(SB), AX 49 JMP racecalladdr<>(SB) 50 51 // func runtimeRaceRead(addr uintptr) 52 TEXT runtimeRaceRead(SB), NOSPLIT, $0-8 53 // This needs to be a tail call, because raceread reads caller pc. 54 JMP runtimeraceread(SB) 55 56 // void runtimeracereadpc(void *addr, void *callpc, void *pc) 57 TEXT runtimeracereadpc(SB), NOSPLIT, $0-24 58 MOVQ addr+0(FP), RARG1 59 MOVQ callpc+8(FP), RARG2 60 MOVQ pc+16(FP), RARG3 61 ADDQ $1, RARG3 // pc is function start, tsan wants return address 62 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 63 MOVQ $__tsan_read_pc(SB), AX 64 JMP racecalladdr<>(SB) 65 66 // func runtimeracewrite(addr uintptr) 67 // Called from instrumented code. 68 TEXT runtimeracewrite(SB), NOSPLIT, $0-8 69 MOVQ addr+0(FP), RARG1 70 MOVQ (SP), RARG2 71 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 72 MOVQ $__tsan_write(SB), AX 73 JMP racecalladdr<>(SB) 74 75 // func runtimeRaceWrite(addr uintptr) 76 TEXT runtimeRaceWrite(SB), NOSPLIT, $0-8 77 // This needs to be a tail call, because racewrite reads caller pc. 78 JMP runtimeracewrite(SB) 79 80 // void runtimeracewritepc(void *addr, void *callpc, void *pc) 81 TEXT runtimeracewritepc(SB), NOSPLIT, $0-24 82 MOVQ addr+0(FP), RARG1 83 MOVQ callpc+8(FP), RARG2 84 MOVQ pc+16(FP), RARG3 85 ADDQ $1, RARG3 // pc is function start, tsan wants return address 86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 87 MOVQ $__tsan_write_pc(SB), AX 88 JMP racecalladdr<>(SB) 89 90 // func runtimeracereadrange(addr, size uintptr) 91 // Called from instrumented code. 92 TEXT runtimeracereadrange(SB), NOSPLIT, $0-16 93 MOVQ addr+0(FP), RARG1 94 MOVQ size+8(FP), RARG2 95 MOVQ (SP), RARG3 96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 97 MOVQ $__tsan_read_range(SB), AX 98 JMP racecalladdr<>(SB) 99 100 // func runtimeRaceReadRange(addr, size uintptr) 101 TEXT runtimeRaceReadRange(SB), NOSPLIT, $0-16 102 // This needs to be a tail call, because racereadrange reads caller pc. 103 JMP runtimeracereadrange(SB) 104 105 // void runtimeracereadrangepc1(void *addr, uintptr sz, void *pc) 106 TEXT runtimeracereadrangepc1(SB), NOSPLIT, $0-24 107 MOVQ addr+0(FP), RARG1 108 MOVQ size+8(FP), RARG2 109 MOVQ pc+16(FP), RARG3 110 ADDQ $1, RARG3 // pc is function start, tsan wants return address 111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVQ $__tsan_read_range(SB), AX 113 JMP racecalladdr<>(SB) 114 115 // func runtimeracewriterange(addr, size uintptr) 116 // Called from instrumented code. 117 TEXT runtimeracewriterange(SB), NOSPLIT, $0-16 118 MOVQ addr+0(FP), RARG1 119 MOVQ size+8(FP), RARG2 120 MOVQ (SP), RARG3 121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 122 MOVQ $__tsan_write_range(SB), AX 123 JMP racecalladdr<>(SB) 124 125 // func runtimeRaceWriteRange(addr, size uintptr) 126 TEXT runtimeRaceWriteRange(SB), NOSPLIT, $0-16 127 // This needs to be a tail call, because racewriterange reads caller pc. 128 JMP runtimeracewriterange(SB) 129 130 // void runtimeracewriterangepc1(void *addr, uintptr sz, void *pc) 131 TEXT runtimeracewriterangepc1(SB), NOSPLIT, $0-24 132 MOVQ addr+0(FP), RARG1 133 MOVQ size+8(FP), RARG2 134 MOVQ pc+16(FP), RARG3 135 ADDQ $1, RARG3 // pc is function start, tsan wants return address 136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 137 MOVQ $__tsan_write_range(SB), AX 138 JMP racecalladdr<>(SB) 139 140 // If addr (RARG1) is out of range, do nothing. 141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 143 get_tls(R12) 144 MOVQ g(R12), R14 145 MOVQ g_racectx(R14), RARG0 // goroutine context 146 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 147 CMPQ RARG1, runtimeracearenastart(SB) 148 JB data 149 CMPQ RARG1, runtimeracearenaend(SB) 150 JB call 151 data: 152 CMPQ RARG1, runtimeracedatastart(SB) 153 JB ret 154 CMPQ RARG1, runtimeracedataend(SB) 155 JAE ret 156 call: 157 MOVQ AX, AX // w/o this 6a miscompiles this function 158 JMP racecall<>(SB) 159 ret: 160 RET 161 162 // func runtimeracefuncenterfp(fp uintptr) 163 // Called from instrumented code. 164 // Like racefuncenter but passes FP, not PC 165 TEXT runtimeracefuncenterfp(SB), NOSPLIT, $0-8 166 MOVQ fp+0(FP), R11 167 MOVQ -8(R11), R11 168 JMP racefuncenter<>(SB) 169 170 // func runtimeracefuncenter(pc uintptr) 171 // Called from instrumented code. 172 TEXT runtimeracefuncenter(SB), NOSPLIT, $0-8 173 MOVQ callpc+0(FP), R11 174 JMP racefuncenter<>(SB) 175 176 // Common code for racefuncenter/racefuncenterfp 177 // R11 = caller's return address 178 TEXT racefuncenter<>(SB), NOSPLIT, $0-0 179 MOVQ DX, R15 // save function entry context (for closures) 180 get_tls(R12) 181 MOVQ g(R12), R14 182 MOVQ g_racectx(R14), RARG0 // goroutine context 183 MOVQ R11, RARG1 184 // void __tsan_func_enter(ThreadState *thr, void *pc); 185 MOVQ $__tsan_func_enter(SB), AX 186 // racecall<> preserves R15 187 CALL racecall<>(SB) 188 MOVQ R15, DX // restore function entry context 189 RET 190 191 // func runtimeracefuncexit() 192 // Called from instrumented code. 193 TEXT runtimeracefuncexit(SB), NOSPLIT, $0-0 194 get_tls(R12) 195 MOVQ g(R12), R14 196 MOVQ g_racectx(R14), RARG0 // goroutine context 197 // void __tsan_func_exit(ThreadState *thr); 198 MOVQ $__tsan_func_exit(SB), AX 199 JMP racecall<>(SB) 200 201 // Atomic operations for sync/atomic package. 202 203 // Load 204 TEXT syncatomicLoadInt32(SB), NOSPLIT, $0-0 205 MOVQ $__tsan_go_atomic32_load(SB), AX 206 CALL racecallatomic<>(SB) 207 RET 208 209 TEXT syncatomicLoadInt64(SB), NOSPLIT, $0-0 210 MOVQ $__tsan_go_atomic64_load(SB), AX 211 CALL racecallatomic<>(SB) 212 RET 213 214 TEXT syncatomicLoadUint32(SB), NOSPLIT, $0-0 215 JMP syncatomicLoadInt32(SB) 216 217 TEXT syncatomicLoadUint64(SB), NOSPLIT, $0-0 218 JMP syncatomicLoadInt64(SB) 219 220 TEXT syncatomicLoadUintptr(SB), NOSPLIT, $0-0 221 JMP syncatomicLoadInt64(SB) 222 223 TEXT syncatomicLoadPointer(SB), NOSPLIT, $0-0 224 JMP syncatomicLoadInt64(SB) 225 226 // Store 227 TEXT syncatomicStoreInt32(SB), NOSPLIT, $0-0 228 MOVQ $__tsan_go_atomic32_store(SB), AX 229 CALL racecallatomic<>(SB) 230 RET 231 232 TEXT syncatomicStoreInt64(SB), NOSPLIT, $0-0 233 MOVQ $__tsan_go_atomic64_store(SB), AX 234 CALL racecallatomic<>(SB) 235 RET 236 237 TEXT syncatomicStoreUint32(SB), NOSPLIT, $0-0 238 JMP syncatomicStoreInt32(SB) 239 240 TEXT syncatomicStoreUint64(SB), NOSPLIT, $0-0 241 JMP syncatomicStoreInt64(SB) 242 243 TEXT syncatomicStoreUintptr(SB), NOSPLIT, $0-0 244 JMP syncatomicStoreInt64(SB) 245 246 // Swap 247 TEXT syncatomicSwapInt32(SB), NOSPLIT, $0-0 248 MOVQ $__tsan_go_atomic32_exchange(SB), AX 249 CALL racecallatomic<>(SB) 250 RET 251 252 TEXT syncatomicSwapInt64(SB), NOSPLIT, $0-0 253 MOVQ $__tsan_go_atomic64_exchange(SB), AX 254 CALL racecallatomic<>(SB) 255 RET 256 257 TEXT syncatomicSwapUint32(SB), NOSPLIT, $0-0 258 JMP syncatomicSwapInt32(SB) 259 260 TEXT syncatomicSwapUint64(SB), NOSPLIT, $0-0 261 JMP syncatomicSwapInt64(SB) 262 263 TEXT syncatomicSwapUintptr(SB), NOSPLIT, $0-0 264 JMP syncatomicSwapInt64(SB) 265 266 // Add 267 TEXT syncatomicAddInt32(SB), NOSPLIT, $0-0 268 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 269 CALL racecallatomic<>(SB) 270 MOVL add+8(FP), AX // convert fetch_add to add_fetch 271 ADDL AX, ret+16(FP) 272 RET 273 274 TEXT syncatomicAddInt64(SB), NOSPLIT, $0-0 275 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 276 CALL racecallatomic<>(SB) 277 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 278 ADDQ AX, ret+16(FP) 279 RET 280 281 TEXT syncatomicAddUint32(SB), NOSPLIT, $0-0 282 JMP syncatomicAddInt32(SB) 283 284 TEXT syncatomicAddUint64(SB), NOSPLIT, $0-0 285 JMP syncatomicAddInt64(SB) 286 287 TEXT syncatomicAddUintptr(SB), NOSPLIT, $0-0 288 JMP syncatomicAddInt64(SB) 289 290 // CompareAndSwap 291 TEXT syncatomicCompareAndSwapInt32(SB), NOSPLIT, $0-0 292 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 293 CALL racecallatomic<>(SB) 294 RET 295 296 TEXT syncatomicCompareAndSwapInt64(SB), NOSPLIT, $0-0 297 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 298 CALL racecallatomic<>(SB) 299 RET 300 301 TEXT syncatomicCompareAndSwapUint32(SB), NOSPLIT, $0-0 302 JMP syncatomicCompareAndSwapInt32(SB) 303 304 TEXT syncatomicCompareAndSwapUint64(SB), NOSPLIT, $0-0 305 JMP syncatomicCompareAndSwapInt64(SB) 306 307 TEXT syncatomicCompareAndSwapUintptr(SB), NOSPLIT, $0-0 308 JMP syncatomicCompareAndSwapInt64(SB) 309 310 // Generic atomic operation implementation. 311 // AX already contains target function. 312 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 313 // Trigger SIGSEGV early. 314 MOVQ 16(SP), R12 315 MOVL (R12), R13 316 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 317 CMPQ R12, runtimeracearenastart(SB) 318 JB racecallatomic_data 319 CMPQ R12, runtimeracearenaend(SB) 320 JB racecallatomic_ok 321 racecallatomic_data: 322 CMPQ R12, runtimeracedatastart(SB) 323 JB racecallatomic_ignore 324 CMPQ R12, runtimeracedataend(SB) 325 JAE racecallatomic_ignore 326 racecallatomic_ok: 327 // Addr is within the good range, call the atomic function. 328 get_tls(R12) 329 MOVQ g(R12), R14 330 MOVQ g_racectx(R14), RARG0 // goroutine context 331 MOVQ 8(SP), RARG1 // caller pc 332 MOVQ (SP), RARG2 // pc 333 LEAQ 16(SP), RARG3 // arguments 334 JMP racecall<>(SB) // does not return 335 racecallatomic_ignore: 336 // Addr is outside the good range. 337 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 338 // An attempt to synchronize on the address would cause crash. 339 MOVQ AX, R15 // remember the original function 340 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 341 get_tls(R12) 342 MOVQ g(R12), R14 343 MOVQ g_racectx(R14), RARG0 // goroutine context 344 CALL racecall<>(SB) 345 MOVQ R15, AX // restore the original function 346 // Call the atomic function. 347 MOVQ g_racectx(R14), RARG0 // goroutine context 348 MOVQ 8(SP), RARG1 // caller pc 349 MOVQ (SP), RARG2 // pc 350 LEAQ 16(SP), RARG3 // arguments 351 CALL racecall<>(SB) 352 // Call __tsan_go_ignore_sync_end. 353 MOVQ $__tsan_go_ignore_sync_end(SB), AX 354 MOVQ g_racectx(R14), RARG0 // goroutine context 355 JMP racecall<>(SB) 356 357 // void runtimeracecall(void(*f)(...), ...) 358 // Calls C function f from race runtime and passes up to 4 arguments to it. 359 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 360 TEXT runtimeracecall(SB), NOSPLIT, $0-0 361 MOVQ fn+0(FP), AX 362 MOVQ arg0+8(FP), RARG0 363 MOVQ arg1+16(FP), RARG1 364 MOVQ arg2+24(FP), RARG2 365 MOVQ arg3+32(FP), RARG3 366 JMP racecall<>(SB) 367 368 // Switches SP to g0 stack and calls (AX). Arguments already set. 369 TEXT racecall<>(SB), NOSPLIT, $0-0 370 get_tls(R12) 371 MOVQ g(R12), R14 372 MOVQ g_m(R14), R13 373 // Switch to g0 stack. 374 MOVQ SP, R12 // callee-saved, preserved across the CALL 375 MOVQ m_g0(R13), R10 376 CMPQ R10, R14 377 JE call // already on g0 378 MOVQ (g_sched+gobuf_sp)(R10), SP 379 call: 380 ANDQ $~15, SP // alignment for gcc ABI 381 CALL AX 382 MOVQ R12, SP 383 RET 384 385 // C->Go callback thunk that allows to call runtimeracesymbolize from C code. 386 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 387 // The overall effect of Go->C->Go call chain is similar to that of mcall. 388 // RARG0 contains command code. RARG1 contains command-specific context. 389 // See racecallback for command codes. 390 TEXT runtimeracecallbackthunk(SB), NOSPLIT, $56-8 391 // Handle command raceGetProcCmd (0) here. 392 // First, code below assumes that we are on curg, while raceGetProcCmd 393 // can be executed on g0. Second, it is called frequently, so will 394 // benefit from this fast path. 395 CMPQ RARG0, $0 396 JNE rest 397 get_tls(RARG0) 398 MOVQ g(RARG0), RARG0 399 MOVQ g_m(RARG0), RARG0 400 MOVQ m_p(RARG0), RARG0 401 MOVQ p_racectx(RARG0), RARG0 402 MOVQ RARG0, (RARG1) 403 RET 404 405 rest: 406 // Save callee-saved registers (Go code won't respect that). 407 // This is superset of darwin/linux/windows registers. 408 PUSHQ BX 409 PUSHQ BP 410 PUSHQ DI 411 PUSHQ SI 412 PUSHQ R12 413 PUSHQ R13 414 PUSHQ R14 415 PUSHQ R15 416 // Set g = g0. 417 get_tls(R12) 418 MOVQ g(R12), R13 419 MOVQ g_m(R13), R13 420 MOVQ m_g0(R13), R14 421 MOVQ R14, g(R12) // g = m->g0 422 PUSHQ RARG1 // func arg 423 PUSHQ RARG0 // func arg 424 CALL runtimeracecallback(SB) 425 POPQ R12 426 POPQ R12 427 // All registers are smashed after Go code, reload. 428 get_tls(R12) 429 MOVQ g(R12), R13 430 MOVQ g_m(R13), R13 431 MOVQ m_curg(R13), R14 432 MOVQ R14, g(R12) // g = m->curg 433 // Restore callee-saved registers. 434 POPQ R15 435 POPQ R14 436 POPQ R13 437 POPQ R12 438 POPQ SI 439 POPQ DI 440 POPQ BP 441 POPQ BX 442 RET 443