1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) 4 * 5 * There are examples in here of: 6 * * how to set protection keys on memory 7 * * how to set/clear bits in PKRU (the rights register) 8 * * how to handle SEGV_PKRU signals and extract pkey-relevant 9 * information from the siginfo 10 * 11 * Things to add: 12 * make sure KSM and KSM COW breaking works 13 * prefault pages in at malloc, or not 14 * protect MPX bounds tables with protection keys? 15 * make sure VMA splitting/merging is working correctly 16 * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys 17 * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel 18 * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks 19 * 20 * Compile like this: 21 * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 22 * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm 23 */ 24 #define _GNU_SOURCE 25 #include <errno.h> 26 #include <linux/futex.h> 27 #include <sys/time.h> 28 #include <sys/syscall.h> 29 #include <string.h> 30 #include <stdio.h> 31 #include <stdint.h> 32 #include <stdbool.h> 33 #include <signal.h> 34 #include <assert.h> 35 #include <stdlib.h> 36 #include <ucontext.h> 37 #include <sys/mman.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include <sys/stat.h> 41 #include <fcntl.h> 42 #include <unistd.h> 43 #include <sys/ptrace.h> 44 #include <setjmp.h> 45 46 #include "pkey-helpers.h" 47 48 int iteration_nr = 1; 49 int test_nr; 50 51 unsigned int shadow_pkru; 52 53 #define HPAGE_SIZE (1UL<<21) 54 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) 55 #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) 56 #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) 57 #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) 58 #define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) 59 #define __stringify_1(x...) #x 60 #define __stringify(x...) __stringify_1(x) 61 62 #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) 63 64 int dprint_in_signal; 65 char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; 66 67 extern void abort_hooks(void); 68 #define pkey_assert(condition) do { \ 69 if (!(condition)) { \ 70 dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ 71 __FILE__, __LINE__, \ 72 test_nr, iteration_nr); \ 73 dprintf0("errno at assert: %d", errno); \ 74 abort_hooks(); \ 75 assert(condition); \ 76 } \ 77 } while (0) 78 #define raw_assert(cond) assert(cond) 79 80 void cat_into_file(char *str, char *file) 81 { 82 int fd = open(file, O_RDWR); 83 int ret; 84 85 dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); 86 /* 87 * these need to be raw because they are called under 88 * pkey_assert() 89 */ 90 raw_assert(fd >= 0); 91 ret = write(fd, str, strlen(str)); 92 if (ret != strlen(str)) { 93 perror("write to file failed"); 94 fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); 95 raw_assert(0); 96 } 97 close(fd); 98 } 99 100 #if CONTROL_TRACING > 0 101 static int warned_tracing; 102 int tracing_root_ok(void) 103 { 104 if (geteuid() != 0) { 105 if (!warned_tracing) 106 fprintf(stderr, "WARNING: not run as root, " 107 "can not do tracing control\n"); 108 warned_tracing = 1; 109 return 0; 110 } 111 return 1; 112 } 113 #endif 114 115 void tracing_on(void) 116 { 117 #if CONTROL_TRACING > 0 118 #define TRACEDIR "/sys/kernel/debug/tracing" 119 char pidstr[32]; 120 121 if (!tracing_root_ok()) 122 return; 123 124 sprintf(pidstr, "%d", getpid()); 125 cat_into_file("0", TRACEDIR "/tracing_on"); 126 cat_into_file("\n", TRACEDIR "/trace"); 127 if (1) { 128 cat_into_file("function_graph", TRACEDIR "/current_tracer"); 129 cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); 130 } else { 131 cat_into_file("nop", TRACEDIR "/current_tracer"); 132 } 133 cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); 134 cat_into_file("1", TRACEDIR "/tracing_on"); 135 dprintf1("enabled tracing\n"); 136 #endif 137 } 138 139 void tracing_off(void) 140 { 141 #if CONTROL_TRACING > 0 142 if (!tracing_root_ok()) 143 return; 144 cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); 145 #endif 146 } 147 148 void abort_hooks(void) 149 { 150 fprintf(stderr, "running %s()...\n", __func__); 151 tracing_off(); 152 #ifdef SLEEP_ON_ABORT 153 sleep(SLEEP_ON_ABORT); 154 #endif 155 } 156 157 static inline void __page_o_noops(void) 158 { 159 /* 8-bytes of instruction * 512 bytes = 1 page */ 160 asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); 161 } 162 163 /* 164 * This attempts to have roughly a page of instructions followed by a few 165 * instructions that do a write, and another page of instructions. That 166 * way, we are pretty sure that the write is in the second page of 167 * instructions and has at least a page of padding behind it. 168 * 169 * *That* lets us be sure to madvise() away the write instruction, which 170 * will then fault, which makes sure that the fault code handles 171 * execute-only memory properly. 172 */ 173 __attribute__((__aligned__(PAGE_SIZE))) 174 void lots_o_noops_around_write(int *write_to_me) 175 { 176 dprintf3("running %s()\n", __func__); 177 __page_o_noops(); 178 /* Assume this happens in the second page of instructions: */ 179 *write_to_me = __LINE__; 180 /* pad out by another page: */ 181 __page_o_noops(); 182 dprintf3("%s() done\n", __func__); 183 } 184 185 /* Define some kernel-like types */ 186 #define u8 uint8_t 187 #define u16 uint16_t 188 #define u32 uint32_t 189 #define u64 uint64_t 190 191 #ifdef __i386__ 192 193 #ifndef SYS_mprotect_key 194 # define SYS_mprotect_key 380 195 #endif 196 #ifndef SYS_pkey_alloc 197 # define SYS_pkey_alloc 381 198 # define SYS_pkey_free 382 199 #endif 200 #define REG_IP_IDX REG_EIP 201 #define si_pkey_offset 0x14 202 203 #else 204 205 #ifndef SYS_mprotect_key 206 # define SYS_mprotect_key 329 207 #endif 208 #ifndef SYS_pkey_alloc 209 # define SYS_pkey_alloc 330 210 # define SYS_pkey_free 331 211 #endif 212 #define REG_IP_IDX REG_RIP 213 #define si_pkey_offset 0x20 214 215 #endif 216 217 void dump_mem(void *dumpme, int len_bytes) 218 { 219 char *c = (void *)dumpme; 220 int i; 221 222 for (i = 0; i < len_bytes; i += sizeof(u64)) { 223 u64 *ptr = (u64 *)(c + i); 224 dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); 225 } 226 } 227 228 #define SEGV_BNDERR 3 /* failed address bound checks */ 229 #define SEGV_PKUERR 4 230 231 static char *si_code_str(int si_code) 232 { 233 if (si_code == SEGV_MAPERR) 234 return "SEGV_MAPERR"; 235 if (si_code == SEGV_ACCERR) 236 return "SEGV_ACCERR"; 237 if (si_code == SEGV_BNDERR) 238 return "SEGV_BNDERR"; 239 if (si_code == SEGV_PKUERR) 240 return "SEGV_PKUERR"; 241 return "UNKNOWN"; 242 } 243 244 int pkru_faults; 245 int last_si_pkey = -1; 246 void signal_handler(int signum, siginfo_t *si, void *vucontext) 247 { 248 ucontext_t *uctxt = vucontext; 249 int trapno; 250 unsigned long ip; 251 char *fpregs; 252 u32 *pkru_ptr; 253 u64 siginfo_pkey; 254 u32 *si_pkey_ptr; 255 int pkru_offset; 256 fpregset_t fpregset; 257 258 dprint_in_signal = 1; 259 dprintf1(">>>>===============SIGSEGV============================\n"); 260 dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, 261 __rdpkru(), shadow_pkru); 262 263 trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; 264 ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; 265 fpregset = uctxt->uc_mcontext.fpregs; 266 fpregs = (void *)fpregset; 267 268 dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, 269 trapno, ip, si_code_str(si->si_code), si->si_code); 270 #ifdef __i386__ 271 /* 272 * 32-bit has some extra padding so that userspace can tell whether 273 * the XSTATE header is present in addition to the "legacy" FPU 274 * state. We just assume that it is here. 275 */ 276 fpregs += 0x70; 277 #endif 278 pkru_offset = pkru_xstate_offset(); 279 pkru_ptr = (void *)(&fpregs[pkru_offset]); 280 281 dprintf1("siginfo: %p\n", si); 282 dprintf1(" fpregs: %p\n", fpregs); 283 /* 284 * If we got a PKRU fault, we *HAVE* to have at least one bit set in 285 * here. 286 */ 287 dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); 288 if (DEBUG_LEVEL > 4) 289 dump_mem(pkru_ptr - 128, 256); 290 pkey_assert(*pkru_ptr); 291 292 si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); 293 dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); 294 dump_mem(si_pkey_ptr - 8, 24); 295 siginfo_pkey = *si_pkey_ptr; 296 pkey_assert(siginfo_pkey < NR_PKEYS); 297 last_si_pkey = siginfo_pkey; 298 299 if ((si->si_code == SEGV_MAPERR) || 300 (si->si_code == SEGV_ACCERR) || 301 (si->si_code == SEGV_BNDERR)) { 302 printf("non-PK si_code, exiting...\n"); 303 exit(4); 304 } 305 306 dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); 307 /* need __rdpkru() version so we do not do shadow_pkru checking */ 308 dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); 309 dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); 310 *(u64 *)pkru_ptr = 0x00000000; 311 dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); 312 pkru_faults++; 313 dprintf1("<<<<==================================================\n"); 314 return; 315 if (trapno == 14) { 316 fprintf(stderr, 317 "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", 318 trapno, ip); 319 fprintf(stderr, "si_addr %p\n", si->si_addr); 320 fprintf(stderr, "REG_ERR: %lx\n", 321 (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); 322 exit(1); 323 } else { 324 fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); 325 fprintf(stderr, "si_addr %p\n", si->si_addr); 326 fprintf(stderr, "REG_ERR: %lx\n", 327 (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); 328 exit(2); 329 } 330 dprint_in_signal = 0; 331 } 332 333 int wait_all_children(void) 334 { 335 int status; 336 return waitpid(-1, &status, 0); 337 } 338 339 void sig_chld(int x) 340 { 341 dprint_in_signal = 1; 342 dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); 343 dprint_in_signal = 0; 344 } 345 346 void setup_sigsegv_handler(void) 347 { 348 int r, rs; 349 struct sigaction newact; 350 struct sigaction oldact; 351 352 /* #PF is mapped to sigsegv */ 353 int signum = SIGSEGV; 354 355 newact.sa_handler = 0; 356 newact.sa_sigaction = signal_handler; 357 358 /*sigset_t - signals to block while in the handler */ 359 /* get the old signal mask. */ 360 rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); 361 pkey_assert(rs == 0); 362 363 /* call sa_sigaction, not sa_handler*/ 364 newact.sa_flags = SA_SIGINFO; 365 366 newact.sa_restorer = 0; /* void(*)(), obsolete */ 367 r = sigaction(signum, &newact, &oldact); 368 r = sigaction(SIGALRM, &newact, &oldact); 369 pkey_assert(r == 0); 370 } 371 372 void setup_handlers(void) 373 { 374 signal(SIGCHLD, &sig_chld); 375 setup_sigsegv_handler(); 376 } 377 378 pid_t fork_lazy_child(void) 379 { 380 pid_t forkret; 381 382 forkret = fork(); 383 pkey_assert(forkret >= 0); 384 dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 385 386 if (!forkret) { 387 /* in the child */ 388 while (1) { 389 dprintf1("child sleeping...\n"); 390 sleep(30); 391 } 392 } 393 return forkret; 394 } 395 396 void davecmp(void *_a, void *_b, int len) 397 { 398 int i; 399 unsigned long *a = _a; 400 unsigned long *b = _b; 401 402 for (i = 0; i < len / sizeof(*a); i++) { 403 if (a[i] == b[i]) 404 continue; 405 406 dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); 407 } 408 } 409 410 void dumpit(char *f) 411 { 412 int fd = open(f, O_RDONLY); 413 char buf[100]; 414 int nr_read; 415 416 dprintf2("maps fd: %d\n", fd); 417 do { 418 nr_read = read(fd, &buf[0], sizeof(buf)); 419 write(1, buf, nr_read); 420 } while (nr_read > 0); 421 close(fd); 422 } 423 424 #define PKEY_DISABLE_ACCESS 0x1 425 #define PKEY_DISABLE_WRITE 0x2 426 427 u32 pkey_get(int pkey, unsigned long flags) 428 { 429 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 430 u32 pkru = __rdpkru(); 431 u32 shifted_pkru; 432 u32 masked_pkru; 433 434 dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", 435 __func__, pkey, flags, 0, 0); 436 dprintf2("%s() raw pkru: %x\n", __func__, pkru); 437 438 shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); 439 dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); 440 masked_pkru = shifted_pkru & mask; 441 dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); 442 /* 443 * shift down the relevant bits to the lowest two, then 444 * mask off all the other high bits. 445 */ 446 return masked_pkru; 447 } 448 449 int pkey_set(int pkey, unsigned long rights, unsigned long flags) 450 { 451 u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); 452 u32 old_pkru = __rdpkru(); 453 u32 new_pkru; 454 455 /* make sure that 'rights' only contains the bits we expect: */ 456 assert(!(rights & ~mask)); 457 458 /* copy old pkru */ 459 new_pkru = old_pkru; 460 /* mask out bits from pkey in old value: */ 461 new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); 462 /* OR in new bits for pkey: */ 463 new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); 464 465 __wrpkru(new_pkru); 466 467 dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", 468 __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); 469 return 0; 470 } 471 472 void pkey_disable_set(int pkey, int flags) 473 { 474 unsigned long syscall_flags = 0; 475 int ret; 476 int pkey_rights; 477 u32 orig_pkru = rdpkru(); 478 479 dprintf1("START->%s(%d, 0x%x)\n", __func__, 480 pkey, flags); 481 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 482 483 pkey_rights = pkey_get(pkey, syscall_flags); 484 485 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 486 pkey, pkey, pkey_rights); 487 pkey_assert(pkey_rights >= 0); 488 489 pkey_rights |= flags; 490 491 ret = pkey_set(pkey, pkey_rights, syscall_flags); 492 assert(!ret); 493 /*pkru and flags have the same format */ 494 shadow_pkru |= flags << (pkey * 2); 495 dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); 496 497 pkey_assert(ret >= 0); 498 499 pkey_rights = pkey_get(pkey, syscall_flags); 500 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 501 pkey, pkey, pkey_rights); 502 503 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 504 if (flags) 505 pkey_assert(rdpkru() > orig_pkru); 506 dprintf1("END<---%s(%d, 0x%x)\n", __func__, 507 pkey, flags); 508 } 509 510 void pkey_disable_clear(int pkey, int flags) 511 { 512 unsigned long syscall_flags = 0; 513 int ret; 514 int pkey_rights = pkey_get(pkey, syscall_flags); 515 u32 orig_pkru = rdpkru(); 516 517 pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); 518 519 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 520 pkey, pkey, pkey_rights); 521 pkey_assert(pkey_rights >= 0); 522 523 pkey_rights |= flags; 524 525 ret = pkey_set(pkey, pkey_rights, 0); 526 /* pkru and flags have the same format */ 527 shadow_pkru &= ~(flags << (pkey * 2)); 528 pkey_assert(ret >= 0); 529 530 pkey_rights = pkey_get(pkey, syscall_flags); 531 dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, 532 pkey, pkey, pkey_rights); 533 534 dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); 535 if (flags) 536 assert(rdpkru() > orig_pkru); 537 } 538 539 void pkey_write_allow(int pkey) 540 { 541 pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); 542 } 543 void pkey_write_deny(int pkey) 544 { 545 pkey_disable_set(pkey, PKEY_DISABLE_WRITE); 546 } 547 void pkey_access_allow(int pkey) 548 { 549 pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); 550 } 551 void pkey_access_deny(int pkey) 552 { 553 pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); 554 } 555 556 int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 557 unsigned long pkey) 558 { 559 int sret; 560 561 dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, 562 ptr, size, orig_prot, pkey); 563 564 errno = 0; 565 sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); 566 if (errno) { 567 dprintf2("SYS_mprotect_key sret: %d\n", sret); 568 dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); 569 dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); 570 if (DEBUG_LEVEL >= 2) 571 perror("SYS_mprotect_pkey"); 572 } 573 return sret; 574 } 575 576 int sys_pkey_alloc(unsigned long flags, unsigned long init_val) 577 { 578 int ret = syscall(SYS_pkey_alloc, flags, init_val); 579 dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", 580 __func__, flags, init_val, ret, errno); 581 return ret; 582 } 583 584 int alloc_pkey(void) 585 { 586 int ret; 587 unsigned long init_val = 0x0; 588 589 dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", 590 __LINE__, __rdpkru(), shadow_pkru); 591 ret = sys_pkey_alloc(0, init_val); 592 /* 593 * pkey_alloc() sets PKRU, so we need to reflect it in 594 * shadow_pkru: 595 */ 596 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 597 __LINE__, ret, __rdpkru(), shadow_pkru); 598 if (ret) { 599 /* clear both the bits: */ 600 shadow_pkru &= ~(0x3 << (ret * 2)); 601 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 602 __LINE__, ret, __rdpkru(), shadow_pkru); 603 /* 604 * move the new state in from init_val 605 * (remember, we cheated and init_val == pkru format) 606 */ 607 shadow_pkru |= (init_val << (ret * 2)); 608 } 609 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 610 __LINE__, ret, __rdpkru(), shadow_pkru); 611 dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); 612 /* for shadow checking: */ 613 rdpkru(); 614 dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", 615 __LINE__, ret, __rdpkru(), shadow_pkru); 616 return ret; 617 } 618 619 int sys_pkey_free(unsigned long pkey) 620 { 621 int ret = syscall(SYS_pkey_free, pkey); 622 dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); 623 return ret; 624 } 625 626 /* 627 * I had a bug where pkey bits could be set by mprotect() but 628 * not cleared. This ensures we get lots of random bit sets 629 * and clears on the vma and pte pkey bits. 630 */ 631 int alloc_random_pkey(void) 632 { 633 int max_nr_pkey_allocs; 634 int ret; 635 int i; 636 int alloced_pkeys[NR_PKEYS]; 637 int nr_alloced = 0; 638 int random_index; 639 memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); 640 641 /* allocate every possible key and make a note of which ones we got */ 642 max_nr_pkey_allocs = NR_PKEYS; 643 max_nr_pkey_allocs = 1; 644 for (i = 0; i < max_nr_pkey_allocs; i++) { 645 int new_pkey = alloc_pkey(); 646 if (new_pkey < 0) 647 break; 648 alloced_pkeys[nr_alloced++] = new_pkey; 649 } 650 651 pkey_assert(nr_alloced > 0); 652 /* select a random one out of the allocated ones */ 653 random_index = rand() % nr_alloced; 654 ret = alloced_pkeys[random_index]; 655 /* now zero it out so we don't free it next */ 656 alloced_pkeys[random_index] = 0; 657 658 /* go through the allocated ones that we did not want and free them */ 659 for (i = 0; i < nr_alloced; i++) { 660 int free_ret; 661 if (!alloced_pkeys[i]) 662 continue; 663 free_ret = sys_pkey_free(alloced_pkeys[i]); 664 pkey_assert(!free_ret); 665 } 666 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 667 __LINE__, ret, __rdpkru(), shadow_pkru); 668 return ret; 669 } 670 671 int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, 672 unsigned long pkey) 673 { 674 int nr_iterations = random() % 100; 675 int ret; 676 677 while (0) { 678 int rpkey = alloc_random_pkey(); 679 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 680 dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 681 ptr, size, orig_prot, pkey, ret); 682 if (nr_iterations-- < 0) 683 break; 684 685 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 686 __LINE__, ret, __rdpkru(), shadow_pkru); 687 sys_pkey_free(rpkey); 688 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 689 __LINE__, ret, __rdpkru(), shadow_pkru); 690 } 691 pkey_assert(pkey < NR_PKEYS); 692 693 ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); 694 dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", 695 ptr, size, orig_prot, pkey, ret); 696 pkey_assert(!ret); 697 dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, 698 __LINE__, ret, __rdpkru(), shadow_pkru); 699 return ret; 700 } 701 702 struct pkey_malloc_record { 703 void *ptr; 704 long size; 705 }; 706 struct pkey_malloc_record *pkey_malloc_records; 707 long nr_pkey_malloc_records; 708 void record_pkey_malloc(void *ptr, long size) 709 { 710 long i; 711 struct pkey_malloc_record *rec = NULL; 712 713 for (i = 0; i < nr_pkey_malloc_records; i++) { 714 rec = &pkey_malloc_records[i]; 715 /* find a free record */ 716 if (rec) 717 break; 718 } 719 if (!rec) { 720 /* every record is full */ 721 size_t old_nr_records = nr_pkey_malloc_records; 722 size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); 723 size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); 724 dprintf2("new_nr_records: %zd\n", new_nr_records); 725 dprintf2("new_size: %zd\n", new_size); 726 pkey_malloc_records = realloc(pkey_malloc_records, new_size); 727 pkey_assert(pkey_malloc_records != NULL); 728 rec = &pkey_malloc_records[nr_pkey_malloc_records]; 729 /* 730 * realloc() does not initialize memory, so zero it from 731 * the first new record all the way to the end. 732 */ 733 for (i = 0; i < new_nr_records - old_nr_records; i++) 734 memset(rec + i, 0, sizeof(*rec)); 735 } 736 dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", 737 (int)(rec - pkey_malloc_records), rec, ptr, size); 738 rec->ptr = ptr; 739 rec->size = size; 740 nr_pkey_malloc_records++; 741 } 742 743 void free_pkey_malloc(void *ptr) 744 { 745 long i; 746 int ret; 747 dprintf3("%s(%p)\n", __func__, ptr); 748 for (i = 0; i < nr_pkey_malloc_records; i++) { 749 struct pkey_malloc_record *rec = &pkey_malloc_records[i]; 750 dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", 751 ptr, i, rec, rec->ptr, rec->size); 752 if ((ptr < rec->ptr) || 753 (ptr >= rec->ptr + rec->size)) 754 continue; 755 756 dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", 757 ptr, i, rec, rec->ptr, rec->size); 758 nr_pkey_malloc_records--; 759 ret = munmap(rec->ptr, rec->size); 760 dprintf3("munmap ret: %d\n", ret); 761 pkey_assert(!ret); 762 dprintf3("clearing rec->ptr, rec: %p\n", rec); 763 rec->ptr = NULL; 764 dprintf3("done clearing rec->ptr, rec: %p\n", rec); 765 return; 766 } 767 pkey_assert(false); 768 } 769 770 771 void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) 772 { 773 void *ptr; 774 int ret; 775 776 rdpkru(); 777 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 778 size, prot, pkey); 779 pkey_assert(pkey < NR_PKEYS); 780 ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 781 pkey_assert(ptr != (void *)-1); 782 ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); 783 pkey_assert(!ret); 784 record_pkey_malloc(ptr, size); 785 rdpkru(); 786 787 dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); 788 return ptr; 789 } 790 791 void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) 792 { 793 int ret; 794 void *ptr; 795 796 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 797 size, prot, pkey); 798 /* 799 * Guarantee we can fit at least one huge page in the resulting 800 * allocation by allocating space for 2: 801 */ 802 size = ALIGN_UP(size, HPAGE_SIZE * 2); 803 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 804 pkey_assert(ptr != (void *)-1); 805 record_pkey_malloc(ptr, size); 806 mprotect_pkey(ptr, size, prot, pkey); 807 808 dprintf1("unaligned ptr: %p\n", ptr); 809 ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); 810 dprintf1(" aligned ptr: %p\n", ptr); 811 ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); 812 dprintf1("MADV_HUGEPAGE ret: %d\n", ret); 813 ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); 814 dprintf1("MADV_WILLNEED ret: %d\n", ret); 815 memset(ptr, 0, HPAGE_SIZE); 816 817 dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); 818 return ptr; 819 } 820 821 int hugetlb_setup_ok; 822 #define GET_NR_HUGE_PAGES 10 823 void setup_hugetlbfs(void) 824 { 825 int err; 826 int fd; 827 char buf[] = "123"; 828 829 if (geteuid() != 0) { 830 fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); 831 return; 832 } 833 834 cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); 835 836 /* 837 * Now go make sure that we got the pages and that they 838 * are 2M pages. Someone might have made 1G the default. 839 */ 840 fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); 841 if (fd < 0) { 842 perror("opening sysfs 2M hugetlb config"); 843 return; 844 } 845 846 /* -1 to guarantee leaving the trailing \0 */ 847 err = read(fd, buf, sizeof(buf)-1); 848 close(fd); 849 if (err <= 0) { 850 perror("reading sysfs 2M hugetlb config"); 851 return; 852 } 853 854 if (atoi(buf) != GET_NR_HUGE_PAGES) { 855 fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", 856 buf, GET_NR_HUGE_PAGES); 857 return; 858 } 859 860 hugetlb_setup_ok = 1; 861 } 862 863 void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) 864 { 865 void *ptr; 866 int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; 867 868 if (!hugetlb_setup_ok) 869 return PTR_ERR_ENOTSUP; 870 871 dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); 872 size = ALIGN_UP(size, HPAGE_SIZE * 2); 873 pkey_assert(pkey < NR_PKEYS); 874 ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); 875 pkey_assert(ptr != (void *)-1); 876 mprotect_pkey(ptr, size, prot, pkey); 877 878 record_pkey_malloc(ptr, size); 879 880 dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); 881 return ptr; 882 } 883 884 void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) 885 { 886 void *ptr; 887 int fd; 888 889 dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, 890 size, prot, pkey); 891 pkey_assert(pkey < NR_PKEYS); 892 fd = open("/dax/foo", O_RDWR); 893 pkey_assert(fd >= 0); 894 895 ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); 896 pkey_assert(ptr != (void *)-1); 897 898 mprotect_pkey(ptr, size, prot, pkey); 899 900 record_pkey_malloc(ptr, size); 901 902 dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); 903 close(fd); 904 return ptr; 905 } 906 907 void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { 908 909 malloc_pkey_with_mprotect, 910 malloc_pkey_anon_huge, 911 malloc_pkey_hugetlb 912 /* can not do direct with the pkey_mprotect() API: 913 malloc_pkey_mmap_direct, 914 malloc_pkey_mmap_dax, 915 */ 916 }; 917 918 void *malloc_pkey(long size, int prot, u16 pkey) 919 { 920 void *ret; 921 static int malloc_type; 922 int nr_malloc_types = ARRAY_SIZE(pkey_malloc); 923 924 pkey_assert(pkey < NR_PKEYS); 925 926 while (1) { 927 pkey_assert(malloc_type < nr_malloc_types); 928 929 ret = pkey_malloc[malloc_type](size, prot, pkey); 930 pkey_assert(ret != (void *)-1); 931 932 malloc_type++; 933 if (malloc_type >= nr_malloc_types) 934 malloc_type = (random()%nr_malloc_types); 935 936 /* try again if the malloc_type we tried is unsupported */ 937 if (ret == PTR_ERR_ENOTSUP) 938 continue; 939 940 break; 941 } 942 943 dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, 944 size, prot, pkey, ret); 945 return ret; 946 } 947 948 int last_pkru_faults; 949 void expected_pk_fault(int pkey) 950 { 951 dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", 952 __func__, last_pkru_faults, pkru_faults); 953 dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); 954 pkey_assert(last_pkru_faults + 1 == pkru_faults); 955 pkey_assert(last_si_pkey == pkey); 956 /* 957 * The signal handler shold have cleared out PKRU to let the 958 * test program continue. We now have to restore it. 959 */ 960 if (__rdpkru() != 0) 961 pkey_assert(0); 962 963 __wrpkru(shadow_pkru); 964 dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", 965 __func__, shadow_pkru); 966 last_pkru_faults = pkru_faults; 967 last_si_pkey = -1; 968 } 969 970 void do_not_expect_pk_fault(void) 971 { 972 pkey_assert(last_pkru_faults == pkru_faults); 973 } 974 975 int test_fds[10] = { -1 }; 976 int nr_test_fds; 977 void __save_test_fd(int fd) 978 { 979 pkey_assert(fd >= 0); 980 pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); 981 test_fds[nr_test_fds] = fd; 982 nr_test_fds++; 983 } 984 985 int get_test_read_fd(void) 986 { 987 int test_fd = open("/etc/passwd", O_RDONLY); 988 __save_test_fd(test_fd); 989 return test_fd; 990 } 991 992 void close_test_fds(void) 993 { 994 int i; 995 996 for (i = 0; i < nr_test_fds; i++) { 997 if (test_fds[i] < 0) 998 continue; 999 close(test_fds[i]); 1000 test_fds[i] = -1; 1001 } 1002 nr_test_fds = 0; 1003 } 1004 1005 #define barrier() __asm__ __volatile__("": : :"memory") 1006 __attribute__((noinline)) int read_ptr(int *ptr) 1007 { 1008 /* 1009 * Keep GCC from optimizing this away somehow 1010 */ 1011 barrier(); 1012 return *ptr; 1013 } 1014 1015 void test_read_of_write_disabled_region(int *ptr, u16 pkey) 1016 { 1017 int ptr_contents; 1018 1019 dprintf1("disabling write access to PKEY[1], doing read\n"); 1020 pkey_write_deny(pkey); 1021 ptr_contents = read_ptr(ptr); 1022 dprintf1("*ptr: %d\n", ptr_contents); 1023 dprintf1("\n"); 1024 } 1025 void test_read_of_access_disabled_region(int *ptr, u16 pkey) 1026 { 1027 int ptr_contents; 1028 1029 dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); 1030 rdpkru(); 1031 pkey_access_deny(pkey); 1032 ptr_contents = read_ptr(ptr); 1033 dprintf1("*ptr: %d\n", ptr_contents); 1034 expected_pk_fault(pkey); 1035 } 1036 void test_write_of_write_disabled_region(int *ptr, u16 pkey) 1037 { 1038 dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); 1039 pkey_write_deny(pkey); 1040 *ptr = __LINE__; 1041 expected_pk_fault(pkey); 1042 } 1043 void test_write_of_access_disabled_region(int *ptr, u16 pkey) 1044 { 1045 dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); 1046 pkey_access_deny(pkey); 1047 *ptr = __LINE__; 1048 expected_pk_fault(pkey); 1049 } 1050 void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) 1051 { 1052 int ret; 1053 int test_fd = get_test_read_fd(); 1054 1055 dprintf1("disabling access to PKEY[%02d], " 1056 "having kernel read() to buffer\n", pkey); 1057 pkey_access_deny(pkey); 1058 ret = read(test_fd, ptr, 1); 1059 dprintf1("read ret: %d\n", ret); 1060 pkey_assert(ret); 1061 } 1062 void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) 1063 { 1064 int ret; 1065 int test_fd = get_test_read_fd(); 1066 1067 pkey_write_deny(pkey); 1068 ret = read(test_fd, ptr, 100); 1069 dprintf1("read ret: %d\n", ret); 1070 if (ret < 0 && (DEBUG_LEVEL > 0)) 1071 perror("verbose read result (OK for this to be bad)"); 1072 pkey_assert(ret); 1073 } 1074 1075 void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) 1076 { 1077 int pipe_ret, vmsplice_ret; 1078 struct iovec iov; 1079 int pipe_fds[2]; 1080 1081 pipe_ret = pipe(pipe_fds); 1082 1083 pkey_assert(pipe_ret == 0); 1084 dprintf1("disabling access to PKEY[%02d], " 1085 "having kernel vmsplice from buffer\n", pkey); 1086 pkey_access_deny(pkey); 1087 iov.iov_base = ptr; 1088 iov.iov_len = PAGE_SIZE; 1089 vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); 1090 dprintf1("vmsplice() ret: %d\n", vmsplice_ret); 1091 pkey_assert(vmsplice_ret == -1); 1092 1093 close(pipe_fds[0]); 1094 close(pipe_fds[1]); 1095 } 1096 1097 void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) 1098 { 1099 int ignored = 0xdada; 1100 int futex_ret; 1101 int some_int = __LINE__; 1102 1103 dprintf1("disabling write to PKEY[%02d], " 1104 "doing futex gunk in buffer\n", pkey); 1105 *ptr = some_int; 1106 pkey_write_deny(pkey); 1107 futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, 1108 &ignored, ignored); 1109 if (DEBUG_LEVEL > 0) 1110 perror("futex"); 1111 dprintf1("futex() ret: %d\n", futex_ret); 1112 } 1113 1114 /* Assumes that all pkeys other than 'pkey' are unallocated */ 1115 void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) 1116 { 1117 int err; 1118 int i; 1119 1120 /* Note: 0 is the default pkey, so don't mess with it */ 1121 for (i = 1; i < NR_PKEYS; i++) { 1122 if (pkey == i) 1123 continue; 1124 1125 dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); 1126 err = sys_pkey_free(i); 1127 pkey_assert(err); 1128 1129 err = sys_pkey_free(i); 1130 pkey_assert(err); 1131 1132 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); 1133 pkey_assert(err); 1134 } 1135 } 1136 1137 /* Assumes that all pkeys other than 'pkey' are unallocated */ 1138 void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) 1139 { 1140 int err; 1141 int bad_pkey = NR_PKEYS+99; 1142 1143 /* pass a known-invalid pkey in: */ 1144 err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); 1145 pkey_assert(err); 1146 } 1147 1148 /* Assumes that all pkeys other than 'pkey' are unallocated */ 1149 void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1150 { 1151 int err; 1152 int allocated_pkeys[NR_PKEYS] = {0}; 1153 int nr_allocated_pkeys = 0; 1154 int i; 1155 1156 for (i = 0; i < NR_PKEYS*2; i++) { 1157 int new_pkey; 1158 dprintf1("%s() alloc loop: %d\n", __func__, i); 1159 new_pkey = alloc_pkey(); 1160 dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, 1161 __LINE__, err, __rdpkru(), shadow_pkru); 1162 rdpkru(); /* for shadow checking */ 1163 dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); 1164 if ((new_pkey == -1) && (errno == ENOSPC)) { 1165 dprintf2("%s() failed to allocate pkey after %d tries\n", 1166 __func__, nr_allocated_pkeys); 1167 break; 1168 } 1169 pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1170 allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1171 } 1172 1173 dprintf3("%s()::%d\n", __func__, __LINE__); 1174 1175 /* 1176 * ensure it did not reach the end of the loop without 1177 * failure: 1178 */ 1179 pkey_assert(i < NR_PKEYS*2); 1180 1181 /* 1182 * There are 16 pkeys supported in hardware. One is taken 1183 * up for the default (0) and another can be taken up by 1184 * an execute-only mapping. Ensure that we can allocate 1185 * at least 14 (16-2). 1186 */ 1187 pkey_assert(i >= NR_PKEYS-2); 1188 1189 for (i = 0; i < nr_allocated_pkeys; i++) { 1190 err = sys_pkey_free(allocated_pkeys[i]); 1191 pkey_assert(!err); 1192 rdpkru(); /* for shadow checking */ 1193 } 1194 } 1195 1196 void test_ptrace_of_child(int *ptr, u16 pkey) 1197 { 1198 __attribute__((__unused__)) int peek_result; 1199 pid_t child_pid; 1200 void *ignored = 0; 1201 long ret; 1202 int status; 1203 /* 1204 * This is the "control" for our little expermient. Make sure 1205 * we can always access it when ptracing. 1206 */ 1207 int *plain_ptr_unaligned = malloc(HPAGE_SIZE); 1208 int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); 1209 1210 /* 1211 * Fork a child which is an exact copy of this process, of course. 1212 * That means we can do all of our tests via ptrace() and then plain 1213 * memory access and ensure they work differently. 1214 */ 1215 child_pid = fork_lazy_child(); 1216 dprintf1("[%d] child pid: %d\n", getpid(), child_pid); 1217 1218 ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); 1219 if (ret) 1220 perror("attach"); 1221 dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); 1222 pkey_assert(ret != -1); 1223 ret = waitpid(child_pid, &status, WUNTRACED); 1224 if ((ret != child_pid) || !(WIFSTOPPED(status))) { 1225 fprintf(stderr, "weird waitpid result %ld stat %x\n", 1226 ret, status); 1227 pkey_assert(0); 1228 } 1229 dprintf2("waitpid ret: %ld\n", ret); 1230 dprintf2("waitpid status: %d\n", status); 1231 1232 pkey_access_deny(pkey); 1233 pkey_write_deny(pkey); 1234 1235 /* Write access, untested for now: 1236 ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); 1237 pkey_assert(ret != -1); 1238 dprintf1("poke at %p: %ld\n", peek_at, ret); 1239 */ 1240 1241 /* 1242 * Try to access the pkey-protected "ptr" via ptrace: 1243 */ 1244 ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); 1245 /* expect it to work, without an error: */ 1246 pkey_assert(ret != -1); 1247 /* Now access from the current task, and expect an exception: */ 1248 peek_result = read_ptr(ptr); 1249 expected_pk_fault(pkey); 1250 1251 /* 1252 * Try to access the NON-pkey-protected "plain_ptr" via ptrace: 1253 */ 1254 ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); 1255 /* expect it to work, without an error: */ 1256 pkey_assert(ret != -1); 1257 /* Now access from the current task, and expect NO exception: */ 1258 peek_result = read_ptr(plain_ptr); 1259 do_not_expect_pk_fault(); 1260 1261 ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); 1262 pkey_assert(ret != -1); 1263 1264 ret = kill(child_pid, SIGKILL); 1265 pkey_assert(ret != -1); 1266 1267 wait(&status); 1268 1269 free(plain_ptr_unaligned); 1270 } 1271 1272 void test_executing_on_unreadable_memory(int *ptr, u16 pkey) 1273 { 1274 void *p1; 1275 int scratch; 1276 int ptr_contents; 1277 int ret; 1278 1279 p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); 1280 dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); 1281 /* lots_o_noops_around_write should be page-aligned already */ 1282 assert(p1 == &lots_o_noops_around_write); 1283 1284 /* Point 'p1' at the *second* page of the function: */ 1285 p1 += PAGE_SIZE; 1286 1287 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1288 lots_o_noops_around_write(&scratch); 1289 ptr_contents = read_ptr(p1); 1290 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1291 1292 ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); 1293 pkey_assert(!ret); 1294 pkey_access_deny(pkey); 1295 1296 dprintf2("pkru: %x\n", rdpkru()); 1297 1298 /* 1299 * Make sure this is an *instruction* fault 1300 */ 1301 madvise(p1, PAGE_SIZE, MADV_DONTNEED); 1302 lots_o_noops_around_write(&scratch); 1303 do_not_expect_pk_fault(); 1304 ptr_contents = read_ptr(p1); 1305 dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); 1306 expected_pk_fault(pkey); 1307 } 1308 1309 void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) 1310 { 1311 int size = PAGE_SIZE; 1312 int sret; 1313 1314 if (cpu_has_pku()) { 1315 dprintf1("SKIP: %s: no CPU support\n", __func__); 1316 return; 1317 } 1318 1319 sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); 1320 pkey_assert(sret < 0); 1321 } 1322 1323 void (*pkey_tests[])(int *ptr, u16 pkey) = { 1324 test_read_of_write_disabled_region, 1325 test_read_of_access_disabled_region, 1326 test_write_of_write_disabled_region, 1327 test_write_of_access_disabled_region, 1328 test_kernel_write_of_access_disabled_region, 1329 test_kernel_write_of_write_disabled_region, 1330 test_kernel_gup_of_access_disabled_region, 1331 test_kernel_gup_write_to_write_disabled_region, 1332 test_executing_on_unreadable_memory, 1333 test_ptrace_of_child, 1334 test_pkey_syscalls_on_non_allocated_pkey, 1335 test_pkey_syscalls_bad_args, 1336 test_pkey_alloc_exhaust, 1337 }; 1338 1339 void run_tests_once(void) 1340 { 1341 int *ptr; 1342 int prot = PROT_READ|PROT_WRITE; 1343 1344 for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { 1345 int pkey; 1346 int orig_pkru_faults = pkru_faults; 1347 1348 dprintf1("======================\n"); 1349 dprintf1("test %d preparing...\n", test_nr); 1350 1351 tracing_on(); 1352 pkey = alloc_random_pkey(); 1353 dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); 1354 ptr = malloc_pkey(PAGE_SIZE, prot, pkey); 1355 dprintf1("test %d starting...\n", test_nr); 1356 pkey_tests[test_nr](ptr, pkey); 1357 dprintf1("freeing test memory: %p\n", ptr); 1358 free_pkey_malloc(ptr); 1359 sys_pkey_free(pkey); 1360 1361 dprintf1("pkru_faults: %d\n", pkru_faults); 1362 dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); 1363 1364 tracing_off(); 1365 close_test_fds(); 1366 1367 printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); 1368 dprintf1("======================\n\n"); 1369 } 1370 iteration_nr++; 1371 } 1372 1373 void pkey_setup_shadow(void) 1374 { 1375 shadow_pkru = __rdpkru(); 1376 } 1377 1378 int main(void) 1379 { 1380 int nr_iterations = 22; 1381 1382 setup_handlers(); 1383 1384 printf("has pku: %d\n", cpu_has_pku()); 1385 1386 if (!cpu_has_pku()) { 1387 int size = PAGE_SIZE; 1388 int *ptr; 1389 1390 printf("running PKEY tests for unsupported CPU/OS\n"); 1391 1392 ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); 1393 assert(ptr != (void *)-1); 1394 test_mprotect_pkey_on_unsupported_cpu(ptr, 1); 1395 exit(0); 1396 } 1397 1398 pkey_setup_shadow(); 1399 printf("startup pkru: %x\n", rdpkru()); 1400 setup_hugetlbfs(); 1401 1402 while (nr_iterations-- > 0) 1403 run_tests_once(); 1404 1405 printf("done (all tests OK)\n"); 1406 return 0; 1407 } 1408