1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build race 6 7 #include "go_asm.h" 8 #include "go_tls.h" 9 #include "funcdata.h" 10 #include "textflag.h" 11 12 // The following thunks allow calling the gcc-compiled race runtime directly 13 // from Go code without going all the way through cgo. 14 // First, it's much faster (up to 50% speedup for real Go programs). 15 // Second, it eliminates race-related special cases from cgocall and scheduler. 16 // Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go. 17 18 // A brief recap of the amd64 calling convention. 19 // Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack. 20 // Callee-saved registers are: BX, BP, R12-R15. 21 // SP must be 16-byte aligned. 22 // On Windows: 23 // Arguments are passed in CX, DX, R8, R9, the rest is on stack. 24 // Callee-saved registers are: BX, BP, DI, SI, R12-R15. 25 // SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments: 26 // http://msdn.microsoft.com/en-us/library/ms235286.aspx 27 // We do not do this, because it seems to be intended for vararg/unprototyped functions. 28 // Gcc-compiled race runtime does not try to use that space. 29 30 #ifdef GOOS_windows 31 #define RARG0 CX 32 #define RARG1 DX 33 #define RARG2 R8 34 #define RARG3 R9 35 #else 36 #define RARG0 DI 37 #define RARG1 SI 38 #define RARG2 DX 39 #define RARG3 CX 40 #endif 41 42 // func runtimeraceread(addr uintptr) 43 // Called from instrumented code. 44 TEXT runtimeraceread(SB), NOSPLIT, $0-8 45 MOVQ addr+0(FP), RARG1 46 MOVQ (SP), RARG2 47 // void __tsan_read(ThreadState *thr, void *addr, void *pc); 48 MOVQ $__tsan_read(SB), AX 49 JMP racecalladdr<>(SB) 50 51 // func runtimeRaceRead(addr uintptr) 52 TEXT runtimeRaceRead(SB), NOSPLIT, $0-8 53 // This needs to be a tail call, because raceread reads caller pc. 54 JMP runtimeraceread(SB) 55 56 // void runtimeracereadpc(void *addr, void *callpc, void *pc) 57 TEXT runtimeracereadpc(SB), NOSPLIT, $0-24 58 MOVQ addr+0(FP), RARG1 59 MOVQ callpc+8(FP), RARG2 60 MOVQ pc+16(FP), RARG3 61 ADDQ $1, RARG3 // pc is function start, tsan wants return address 62 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 63 MOVQ $__tsan_read_pc(SB), AX 64 JMP racecalladdr<>(SB) 65 66 // func runtimeracewrite(addr uintptr) 67 // Called from instrumented code. 68 TEXT runtimeracewrite(SB), NOSPLIT, $0-8 69 MOVQ addr+0(FP), RARG1 70 MOVQ (SP), RARG2 71 // void __tsan_write(ThreadState *thr, void *addr, void *pc); 72 MOVQ $__tsan_write(SB), AX 73 JMP racecalladdr<>(SB) 74 75 // func runtimeRaceWrite(addr uintptr) 76 TEXT runtimeRaceWrite(SB), NOSPLIT, $0-8 77 // This needs to be a tail call, because racewrite reads caller pc. 78 JMP runtimeracewrite(SB) 79 80 // void runtimeracewritepc(void *addr, void *callpc, void *pc) 81 TEXT runtimeracewritepc(SB), NOSPLIT, $0-24 82 MOVQ addr+0(FP), RARG1 83 MOVQ callpc+8(FP), RARG2 84 MOVQ pc+16(FP), RARG3 85 ADDQ $1, RARG3 // pc is function start, tsan wants return address 86 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc); 87 MOVQ $__tsan_write_pc(SB), AX 88 JMP racecalladdr<>(SB) 89 90 // func runtimeracereadrange(addr, size uintptr) 91 // Called from instrumented code. 92 TEXT runtimeracereadrange(SB), NOSPLIT, $0-16 93 MOVQ addr+0(FP), RARG1 94 MOVQ size+8(FP), RARG2 95 MOVQ (SP), RARG3 96 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 97 MOVQ $__tsan_read_range(SB), AX 98 JMP racecalladdr<>(SB) 99 100 // func runtimeRaceReadRange(addr, size uintptr) 101 TEXT runtimeRaceReadRange(SB), NOSPLIT, $0-16 102 // This needs to be a tail call, because racereadrange reads caller pc. 103 JMP runtimeracereadrange(SB) 104 105 // void runtimeracereadrangepc1(void *addr, uintptr sz, void *pc) 106 TEXT runtimeracereadrangepc1(SB), NOSPLIT, $0-24 107 MOVQ addr+0(FP), RARG1 108 MOVQ size+8(FP), RARG2 109 MOVQ pc+16(FP), RARG3 110 ADDQ $1, RARG3 // pc is function start, tsan wants return address 111 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); 112 MOVQ $__tsan_read_range(SB), AX 113 JMP racecalladdr<>(SB) 114 115 // func runtimeracewriterange(addr, size uintptr) 116 // Called from instrumented code. 117 TEXT runtimeracewriterange(SB), NOSPLIT, $0-16 118 MOVQ addr+0(FP), RARG1 119 MOVQ size+8(FP), RARG2 120 MOVQ (SP), RARG3 121 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 122 MOVQ $__tsan_write_range(SB), AX 123 JMP racecalladdr<>(SB) 124 125 // func runtimeRaceWriteRange(addr, size uintptr) 126 TEXT runtimeRaceWriteRange(SB), NOSPLIT, $0-16 127 // This needs to be a tail call, because racewriterange reads caller pc. 128 JMP runtimeracewriterange(SB) 129 130 // void runtimeracewriterangepc1(void *addr, uintptr sz, void *pc) 131 TEXT runtimeracewriterangepc1(SB), NOSPLIT, $0-24 132 MOVQ addr+0(FP), RARG1 133 MOVQ size+8(FP), RARG2 134 MOVQ pc+16(FP), RARG3 135 ADDQ $1, RARG3 // pc is function start, tsan wants return address 136 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); 137 MOVQ $__tsan_write_range(SB), AX 138 JMP racecalladdr<>(SB) 139 140 // If addr (RARG1) is out of range, do nothing. 141 // Otherwise, setup goroutine context and invoke racecall. Other arguments already set. 142 TEXT racecalladdr<>(SB), NOSPLIT, $0-0 143 get_tls(R12) 144 MOVQ g(R12), R14 145 MOVQ g_racectx(R14), RARG0 // goroutine context 146 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 147 CMPQ RARG1, runtimeracearenastart(SB) 148 JB data 149 CMPQ RARG1, runtimeracearenaend(SB) 150 JB call 151 data: 152 CMPQ RARG1, runtimeracedatastart(SB) 153 JB ret 154 CMPQ RARG1, runtimeracedataend(SB) 155 JAE ret 156 call: 157 MOVQ AX, AX // w/o this 6a miscompiles this function 158 JMP racecall<>(SB) 159 ret: 160 RET 161 162 // func runtimeracefuncenter(pc uintptr) 163 // Called from instrumented code. 164 TEXT runtimeracefuncenter(SB), NOSPLIT, $0-8 165 MOVQ DX, R15 // save function entry context (for closures) 166 get_tls(R12) 167 MOVQ g(R12), R14 168 MOVQ g_racectx(R14), RARG0 // goroutine context 169 MOVQ callpc+0(FP), RARG1 170 // void __tsan_func_enter(ThreadState *thr, void *pc); 171 MOVQ $__tsan_func_enter(SB), AX 172 // racecall<> preserves R15 173 CALL racecall<>(SB) 174 MOVQ R15, DX // restore function entry context 175 RET 176 177 // func runtimeracefuncexit() 178 // Called from instrumented code. 179 TEXT runtimeracefuncexit(SB), NOSPLIT, $0-0 180 get_tls(R12) 181 MOVQ g(R12), R14 182 MOVQ g_racectx(R14), RARG0 // goroutine context 183 // void __tsan_func_exit(ThreadState *thr); 184 MOVQ $__tsan_func_exit(SB), AX 185 JMP racecall<>(SB) 186 187 // Atomic operations for sync/atomic package. 188 189 // Load 190 TEXT syncatomicLoadInt32(SB), NOSPLIT, $0-0 191 MOVQ $__tsan_go_atomic32_load(SB), AX 192 CALL racecallatomic<>(SB) 193 RET 194 195 TEXT syncatomicLoadInt64(SB), NOSPLIT, $0-0 196 MOVQ $__tsan_go_atomic64_load(SB), AX 197 CALL racecallatomic<>(SB) 198 RET 199 200 TEXT syncatomicLoadUint32(SB), NOSPLIT, $0-0 201 JMP syncatomicLoadInt32(SB) 202 203 TEXT syncatomicLoadUint64(SB), NOSPLIT, $0-0 204 JMP syncatomicLoadInt64(SB) 205 206 TEXT syncatomicLoadUintptr(SB), NOSPLIT, $0-0 207 JMP syncatomicLoadInt64(SB) 208 209 TEXT syncatomicLoadPointer(SB), NOSPLIT, $0-0 210 JMP syncatomicLoadInt64(SB) 211 212 // Store 213 TEXT syncatomicStoreInt32(SB), NOSPLIT, $0-0 214 MOVQ $__tsan_go_atomic32_store(SB), AX 215 CALL racecallatomic<>(SB) 216 RET 217 218 TEXT syncatomicStoreInt64(SB), NOSPLIT, $0-0 219 MOVQ $__tsan_go_atomic64_store(SB), AX 220 CALL racecallatomic<>(SB) 221 RET 222 223 TEXT syncatomicStoreUint32(SB), NOSPLIT, $0-0 224 JMP syncatomicStoreInt32(SB) 225 226 TEXT syncatomicStoreUint64(SB), NOSPLIT, $0-0 227 JMP syncatomicStoreInt64(SB) 228 229 TEXT syncatomicStoreUintptr(SB), NOSPLIT, $0-0 230 JMP syncatomicStoreInt64(SB) 231 232 // Swap 233 TEXT syncatomicSwapInt32(SB), NOSPLIT, $0-0 234 MOVQ $__tsan_go_atomic32_exchange(SB), AX 235 CALL racecallatomic<>(SB) 236 RET 237 238 TEXT syncatomicSwapInt64(SB), NOSPLIT, $0-0 239 MOVQ $__tsan_go_atomic64_exchange(SB), AX 240 CALL racecallatomic<>(SB) 241 RET 242 243 TEXT syncatomicSwapUint32(SB), NOSPLIT, $0-0 244 JMP syncatomicSwapInt32(SB) 245 246 TEXT syncatomicSwapUint64(SB), NOSPLIT, $0-0 247 JMP syncatomicSwapInt64(SB) 248 249 TEXT syncatomicSwapUintptr(SB), NOSPLIT, $0-0 250 JMP syncatomicSwapInt64(SB) 251 252 // Add 253 TEXT syncatomicAddInt32(SB), NOSPLIT, $0-0 254 MOVQ $__tsan_go_atomic32_fetch_add(SB), AX 255 CALL racecallatomic<>(SB) 256 MOVL add+8(FP), AX // convert fetch_add to add_fetch 257 ADDL AX, ret+16(FP) 258 RET 259 260 TEXT syncatomicAddInt64(SB), NOSPLIT, $0-0 261 MOVQ $__tsan_go_atomic64_fetch_add(SB), AX 262 CALL racecallatomic<>(SB) 263 MOVQ add+8(FP), AX // convert fetch_add to add_fetch 264 ADDQ AX, ret+16(FP) 265 RET 266 267 TEXT syncatomicAddUint32(SB), NOSPLIT, $0-0 268 JMP syncatomicAddInt32(SB) 269 270 TEXT syncatomicAddUint64(SB), NOSPLIT, $0-0 271 JMP syncatomicAddInt64(SB) 272 273 TEXT syncatomicAddUintptr(SB), NOSPLIT, $0-0 274 JMP syncatomicAddInt64(SB) 275 276 // CompareAndSwap 277 TEXT syncatomicCompareAndSwapInt32(SB), NOSPLIT, $0-0 278 MOVQ $__tsan_go_atomic32_compare_exchange(SB), AX 279 CALL racecallatomic<>(SB) 280 RET 281 282 TEXT syncatomicCompareAndSwapInt64(SB), NOSPLIT, $0-0 283 MOVQ $__tsan_go_atomic64_compare_exchange(SB), AX 284 CALL racecallatomic<>(SB) 285 RET 286 287 TEXT syncatomicCompareAndSwapUint32(SB), NOSPLIT, $0-0 288 JMP syncatomicCompareAndSwapInt32(SB) 289 290 TEXT syncatomicCompareAndSwapUint64(SB), NOSPLIT, $0-0 291 JMP syncatomicCompareAndSwapInt64(SB) 292 293 TEXT syncatomicCompareAndSwapUintptr(SB), NOSPLIT, $0-0 294 JMP syncatomicCompareAndSwapInt64(SB) 295 296 // Generic atomic operation implementation. 297 // AX already contains target function. 298 TEXT racecallatomic<>(SB), NOSPLIT, $0-0 299 // Trigger SIGSEGV early. 300 MOVQ 16(SP), R12 301 MOVL (R12), R13 302 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend). 303 CMPQ R12, runtimeracearenastart(SB) 304 JB racecallatomic_data 305 CMPQ R12, runtimeracearenaend(SB) 306 JB racecallatomic_ok 307 racecallatomic_data: 308 CMPQ R12, runtimeracedatastart(SB) 309 JB racecallatomic_ignore 310 CMPQ R12, runtimeracedataend(SB) 311 JAE racecallatomic_ignore 312 racecallatomic_ok: 313 // Addr is within the good range, call the atomic function. 314 get_tls(R12) 315 MOVQ g(R12), R14 316 MOVQ g_racectx(R14), RARG0 // goroutine context 317 MOVQ 8(SP), RARG1 // caller pc 318 MOVQ (SP), RARG2 // pc 319 LEAQ 16(SP), RARG3 // arguments 320 JMP racecall<>(SB) // does not return 321 racecallatomic_ignore: 322 // Addr is outside the good range. 323 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op. 324 // An attempt to synchronize on the address would cause crash. 325 MOVQ AX, R15 // remember the original function 326 MOVQ $__tsan_go_ignore_sync_begin(SB), AX 327 MOVQ g(R12), R14 328 MOVQ g_racectx(R14), RARG0 // goroutine context 329 CALL racecall<>(SB) 330 MOVQ R15, AX // restore the original function 331 // Call the atomic function. 332 MOVQ g_racectx(R14), RARG0 // goroutine context 333 MOVQ 8(SP), RARG1 // caller pc 334 MOVQ (SP), RARG2 // pc 335 LEAQ 16(SP), RARG3 // arguments 336 CALL racecall<>(SB) 337 // Call __tsan_go_ignore_sync_end. 338 MOVQ $__tsan_go_ignore_sync_end(SB), AX 339 MOVQ g_racectx(R14), RARG0 // goroutine context 340 JMP racecall<>(SB) 341 342 // void runtimeracecall(void(*f)(...), ...) 343 // Calls C function f from race runtime and passes up to 4 arguments to it. 344 // The arguments are never heap-object-preserving pointers, so we pretend there are no arguments. 345 TEXT runtimeracecall(SB), NOSPLIT, $0-0 346 MOVQ fn+0(FP), AX 347 MOVQ arg0+8(FP), RARG0 348 MOVQ arg1+16(FP), RARG1 349 MOVQ arg2+24(FP), RARG2 350 MOVQ arg3+32(FP), RARG3 351 JMP racecall<>(SB) 352 353 // Switches SP to g0 stack and calls (AX). Arguments already set. 354 TEXT racecall<>(SB), NOSPLIT, $0-0 355 get_tls(R12) 356 MOVQ g(R12), R14 357 MOVQ g_m(R14), R13 358 // Switch to g0 stack. 359 MOVQ SP, R12 // callee-saved, preserved across the CALL 360 MOVQ m_g0(R13), R10 361 CMPQ R10, R14 362 JE call // already on g0 363 MOVQ (g_sched+gobuf_sp)(R10), SP 364 call: 365 ANDQ $~15, SP // alignment for gcc ABI 366 CALL AX 367 MOVQ R12, SP 368 RET 369 370 // C->Go callback thunk that allows to call runtimeracesymbolize from C code. 371 // Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g. 372 // The overall effect of Go->C->Go call chain is similar to that of mcall. 373 TEXT runtimeracesymbolizethunk(SB), NOSPLIT, $56-8 374 // Save callee-saved registers (Go code won't respect that). 375 // This is superset of darwin/linux/windows registers. 376 PUSHQ BX 377 PUSHQ BP 378 PUSHQ DI 379 PUSHQ SI 380 PUSHQ R12 381 PUSHQ R13 382 PUSHQ R14 383 PUSHQ R15 384 // Set g = g0. 385 get_tls(R12) 386 MOVQ g(R12), R13 387 MOVQ g_m(R13), R13 388 MOVQ m_g0(R13), R14 389 MOVQ R14, g(R12) // g = m->g0 390 PUSHQ RARG0 // func arg 391 CALL runtimeracesymbolize(SB) 392 POPQ R12 393 // All registers are smashed after Go code, reload. 394 get_tls(R12) 395 MOVQ g(R12), R13 396 MOVQ g_m(R13), R13 397 MOVQ m_curg(R13), R14 398 MOVQ R14, g(R12) // g = m->curg 399 // Restore callee-saved registers. 400 POPQ R15 401 POPQ R14 402 POPQ R13 403 POPQ R12 404 POPQ SI 405 POPQ DI 406 POPQ BP 407 POPQ BX 408 RET 409