1 #ifndef __ASM_PARAVIRT_H 2 #define __ASM_PARAVIRT_H 3 /* Various instructions on x86 need to be replaced for 4 * para-virtualization: those hooks are defined here. */ 5 6 #ifdef CONFIG_PARAVIRT 7 #include <asm/page.h> 8 9 /* Bitmask of what can be clobbered: usually at least eax. */ 10 #define CLBR_NONE 0x0 11 #define CLBR_EAX 0x1 12 #define CLBR_ECX 0x2 13 #define CLBR_EDX 0x4 14 #define CLBR_ANY 0x7 15 16 #ifndef __ASSEMBLY__ 17 #include <linux/types.h> 18 #include <linux/cpumask.h> 19 #include <asm/kmap_types.h> 20 21 struct page; 22 struct thread_struct; 23 struct Xgt_desc_struct; 24 struct tss_struct; 25 struct mm_struct; 26 struct desc_struct; 27 28 /* general info */ 29 struct pv_info { 30 unsigned int kernel_rpl; 31 int shared_kernel_pmd; 32 int paravirt_enabled; 33 const char *name; 34 }; 35 36 struct pv_init_ops { 37 /* 38 * Patch may replace one of the defined code sequences with 39 * arbitrary code, subject to the same register constraints. 40 * This generally means the code is not free to clobber any 41 * registers other than EAX. The patch function should return 42 * the number of bytes of code generated, as we nop pad the 43 * rest in generic code. 44 */ 45 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf, 46 unsigned long addr, unsigned len); 47 48 /* Basic arch-specific setup */ 49 void (*arch_setup)(void); 50 char *(*memory_setup)(void); 51 void (*post_allocator_init)(void); 52 53 /* Print a banner to identify the environment */ 54 void (*banner)(void); 55 }; 56 57 58 struct pv_lazy_ops { 59 /* Set deferred update mode, used for batching operations. */ 60 void (*enter)(void); 61 void (*leave)(void); 62 }; 63 64 struct pv_time_ops { 65 void (*time_init)(void); 66 67 /* Set and set time of day */ 68 unsigned long (*get_wallclock)(void); 69 int (*set_wallclock)(unsigned long); 70 71 unsigned long long (*sched_clock)(void); 72 unsigned long (*get_cpu_khz)(void); 73 }; 74 75 struct pv_cpu_ops { 76 /* hooks for various privileged instructions */ 77 unsigned long (*get_debugreg)(int regno); 78 void (*set_debugreg)(int regno, unsigned long value); 79 80 void (*clts)(void); 81 82 unsigned long (*read_cr0)(void); 83 void (*write_cr0)(unsigned long); 84 85 unsigned long (*read_cr4_safe)(void); 86 unsigned long (*read_cr4)(void); 87 void (*write_cr4)(unsigned long); 88 89 /* Segment descriptor handling */ 90 void (*load_tr_desc)(void); 91 void (*load_gdt)(const struct Xgt_desc_struct *); 92 void (*load_idt)(const struct Xgt_desc_struct *); 93 void (*store_gdt)(struct Xgt_desc_struct *); 94 void (*store_idt)(struct Xgt_desc_struct *); 95 void (*set_ldt)(const void *desc, unsigned entries); 96 unsigned long (*store_tr)(void); 97 void (*load_tls)(struct thread_struct *t, unsigned int cpu); 98 void (*write_ldt_entry)(struct desc_struct *, 99 int entrynum, u32 low, u32 high); 100 void (*write_gdt_entry)(struct desc_struct *, 101 int entrynum, u32 low, u32 high); 102 void (*write_idt_entry)(struct desc_struct *, 103 int entrynum, u32 low, u32 high); 104 void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t); 105 106 void (*set_iopl_mask)(unsigned mask); 107 108 void (*wbinvd)(void); 109 void (*io_delay)(void); 110 111 /* cpuid emulation, mostly so that caps bits can be disabled */ 112 void (*cpuid)(unsigned int *eax, unsigned int *ebx, 113 unsigned int *ecx, unsigned int *edx); 114 115 /* MSR, PMC and TSR operations. 116 err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ 117 u64 (*read_msr)(unsigned int msr, int *err); 118 int (*write_msr)(unsigned int msr, u64 val); 119 120 u64 (*read_tsc)(void); 121 u64 (*read_pmc)(void); 122 123 /* These two are jmp to, not actually called. */ 124 void (*irq_enable_sysexit)(void); 125 void (*iret)(void); 126 127 struct pv_lazy_ops lazy_mode; 128 }; 129 130 struct pv_irq_ops { 131 void (*init_IRQ)(void); 132 133 /* 134 * Get/set interrupt state. save_fl and restore_fl are only 135 * expected to use X86_EFLAGS_IF; all other bits 136 * returned from save_fl are undefined, and may be ignored by 137 * restore_fl. 138 */ 139 unsigned long (*save_fl)(void); 140 void (*restore_fl)(unsigned long); 141 void (*irq_disable)(void); 142 void (*irq_enable)(void); 143 void (*safe_halt)(void); 144 void (*halt)(void); 145 }; 146 147 struct pv_apic_ops { 148 #ifdef CONFIG_X86_LOCAL_APIC 149 /* 150 * Direct APIC operations, principally for VMI. Ideally 151 * these shouldn't be in this interface. 152 */ 153 void (*apic_write)(unsigned long reg, unsigned long v); 154 void (*apic_write_atomic)(unsigned long reg, unsigned long v); 155 unsigned long (*apic_read)(unsigned long reg); 156 void (*setup_boot_clock)(void); 157 void (*setup_secondary_clock)(void); 158 159 void (*startup_ipi_hook)(int phys_apicid, 160 unsigned long start_eip, 161 unsigned long start_esp); 162 #endif 163 }; 164 165 struct pv_mmu_ops { 166 /* 167 * Called before/after init_mm pagetable setup. setup_start 168 * may reset %cr3, and may pre-install parts of the pagetable; 169 * pagetable setup is expected to preserve any existing 170 * mapping. 171 */ 172 void (*pagetable_setup_start)(pgd_t *pgd_base); 173 void (*pagetable_setup_done)(pgd_t *pgd_base); 174 175 unsigned long (*read_cr2)(void); 176 void (*write_cr2)(unsigned long); 177 178 unsigned long (*read_cr3)(void); 179 void (*write_cr3)(unsigned long); 180 181 /* 182 * Hooks for intercepting the creation/use/destruction of an 183 * mm_struct. 184 */ 185 void (*activate_mm)(struct mm_struct *prev, 186 struct mm_struct *next); 187 void (*dup_mmap)(struct mm_struct *oldmm, 188 struct mm_struct *mm); 189 void (*exit_mmap)(struct mm_struct *mm); 190 191 192 /* TLB operations */ 193 void (*flush_tlb_user)(void); 194 void (*flush_tlb_kernel)(void); 195 void (*flush_tlb_single)(unsigned long addr); 196 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 197 unsigned long va); 198 199 /* Hooks for allocating/releasing pagetable pages */ 200 void (*alloc_pt)(struct mm_struct *mm, u32 pfn); 201 void (*alloc_pd)(u32 pfn); 202 void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); 203 void (*release_pt)(u32 pfn); 204 void (*release_pd)(u32 pfn); 205 206 /* Pagetable manipulation functions */ 207 void (*set_pte)(pte_t *ptep, pte_t pteval); 208 void (*set_pte_at)(struct mm_struct *mm, unsigned long addr, 209 pte_t *ptep, pte_t pteval); 210 void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); 211 void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 212 void (*pte_update_defer)(struct mm_struct *mm, 213 unsigned long addr, pte_t *ptep); 214 215 #ifdef CONFIG_X86_PAE 216 void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); 217 void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, 218 pte_t *ptep, pte_t pte); 219 void (*set_pud)(pud_t *pudp, pud_t pudval); 220 void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 221 void (*pmd_clear)(pmd_t *pmdp); 222 223 unsigned long long (*pte_val)(pte_t); 224 unsigned long long (*pmd_val)(pmd_t); 225 unsigned long long (*pgd_val)(pgd_t); 226 227 pte_t (*make_pte)(unsigned long long pte); 228 pmd_t (*make_pmd)(unsigned long long pmd); 229 pgd_t (*make_pgd)(unsigned long long pgd); 230 #else 231 unsigned long (*pte_val)(pte_t); 232 unsigned long (*pgd_val)(pgd_t); 233 234 pte_t (*make_pte)(unsigned long pte); 235 pgd_t (*make_pgd)(unsigned long pgd); 236 #endif 237 238 #ifdef CONFIG_HIGHPTE 239 void *(*kmap_atomic_pte)(struct page *page, enum km_type type); 240 #endif 241 242 struct pv_lazy_ops lazy_mode; 243 }; 244 245 /* This contains all the paravirt structures: we get a convenient 246 * number for each function using the offset which we use to indicate 247 * what to patch. */ 248 struct paravirt_patch_template 249 { 250 struct pv_init_ops pv_init_ops; 251 struct pv_time_ops pv_time_ops; 252 struct pv_cpu_ops pv_cpu_ops; 253 struct pv_irq_ops pv_irq_ops; 254 struct pv_apic_ops pv_apic_ops; 255 struct pv_mmu_ops pv_mmu_ops; 256 }; 257 258 extern struct pv_info pv_info; 259 extern struct pv_init_ops pv_init_ops; 260 extern struct pv_time_ops pv_time_ops; 261 extern struct pv_cpu_ops pv_cpu_ops; 262 extern struct pv_irq_ops pv_irq_ops; 263 extern struct pv_apic_ops pv_apic_ops; 264 extern struct pv_mmu_ops pv_mmu_ops; 265 266 #define PARAVIRT_PATCH(x) \ 267 (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) 268 269 #define paravirt_type(op) \ 270 [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ 271 [paravirt_opptr] "m" (op) 272 #define paravirt_clobber(clobber) \ 273 [paravirt_clobber] "i" (clobber) 274 275 /* 276 * Generate some code, and mark it as patchable by the 277 * apply_paravirt() alternate instruction patcher. 278 */ 279 #define _paravirt_alt(insn_string, type, clobber) \ 280 "771:\n\t" insn_string "\n" "772:\n" \ 281 ".pushsection .parainstructions,\"a\"\n" \ 282 " .long 771b\n" \ 283 " .byte " type "\n" \ 284 " .byte 772b-771b\n" \ 285 " .short " clobber "\n" \ 286 ".popsection\n" 287 288 /* Generate patchable code, with the default asm parameters. */ 289 #define paravirt_alt(insn_string) \ 290 _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") 291 292 unsigned paravirt_patch_nop(void); 293 unsigned paravirt_patch_ignore(unsigned len); 294 unsigned paravirt_patch_call(void *insnbuf, 295 const void *target, u16 tgt_clobbers, 296 unsigned long addr, u16 site_clobbers, 297 unsigned len); 298 unsigned paravirt_patch_jmp(void *insnbuf, const void *target, 299 unsigned long addr, unsigned len); 300 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf, 301 unsigned long addr, unsigned len); 302 303 unsigned paravirt_patch_insns(void *insnbuf, unsigned len, 304 const char *start, const char *end); 305 306 int paravirt_disable_iospace(void); 307 308 /* 309 * This generates an indirect call based on the operation type number. 310 * The type number, computed in PARAVIRT_PATCH, is derived from the 311 * offset into the paravirt_patch_template structure, and can therefore be 312 * freely converted back into a structure offset. 313 */ 314 #define PARAVIRT_CALL "call *%[paravirt_opptr];" 315 316 /* 317 * These macros are intended to wrap calls through one of the paravirt 318 * ops structs, so that they can be later identified and patched at 319 * runtime. 320 * 321 * Normally, a call to a pv_op function is a simple indirect call: 322 * (paravirt_ops.operations)(args...). 323 * 324 * Unfortunately, this is a relatively slow operation for modern CPUs, 325 * because it cannot necessarily determine what the destination 326 * address is. In this case, the address is a runtime constant, so at 327 * the very least we can patch the call to e a simple direct call, or 328 * ideally, patch an inline implementation into the callsite. (Direct 329 * calls are essentially free, because the call and return addresses 330 * are completely predictable.) 331 * 332 * These macros rely on the standard gcc "regparm(3)" calling 333 * convention, in which the first three arguments are placed in %eax, 334 * %edx, %ecx (in that order), and the remaining arguments are placed 335 * on the stack. All caller-save registers (eax,edx,ecx) are expected 336 * to be modified (either clobbered or used for return values). 337 * 338 * The call instruction itself is marked by placing its start address 339 * and size into the .parainstructions section, so that 340 * apply_paravirt() in arch/i386/kernel/alternative.c can do the 341 * appropriate patching under the control of the backend pv_init_ops 342 * implementation. 343 * 344 * Unfortunately there's no way to get gcc to generate the args setup 345 * for the call, and then allow the call itself to be generated by an 346 * inline asm. Because of this, we must do the complete arg setup and 347 * return value handling from within these macros. This is fairly 348 * cumbersome. 349 * 350 * There are 5 sets of PVOP_* macros for dealing with 0-4 arguments. 351 * It could be extended to more arguments, but there would be little 352 * to be gained from that. For each number of arguments, there are 353 * the two VCALL and CALL variants for void and non-void functions. 354 * 355 * When there is a return value, the invoker of the macro must specify 356 * the return type. The macro then uses sizeof() on that type to 357 * determine whether its a 32 or 64 bit value, and places the return 358 * in the right register(s) (just %eax for 32-bit, and %edx:%eax for 359 * 64-bit). 360 * 361 * 64-bit arguments are passed as a pair of adjacent 32-bit arguments 362 * in low,high order. 363 * 364 * Small structures are passed and returned in registers. The macro 365 * calling convention can't directly deal with this, so the wrapper 366 * functions must do this. 367 * 368 * These PVOP_* macros are only defined within this header. This 369 * means that all uses must be wrapped in inline functions. This also 370 * makes sure the incoming and outgoing types are always correct. 371 */ 372 #define __PVOP_CALL(rettype, op, pre, post, ...) \ 373 ({ \ 374 rettype __ret; \ 375 unsigned long __eax, __edx, __ecx; \ 376 if (sizeof(rettype) > sizeof(unsigned long)) { \ 377 asm volatile(pre \ 378 paravirt_alt(PARAVIRT_CALL) \ 379 post \ 380 : "=a" (__eax), "=d" (__edx), \ 381 "=c" (__ecx) \ 382 : paravirt_type(op), \ 383 paravirt_clobber(CLBR_ANY), \ 384 ##__VA_ARGS__ \ 385 : "memory", "cc"); \ 386 __ret = (rettype)((((u64)__edx) << 32) | __eax); \ 387 } else { \ 388 asm volatile(pre \ 389 paravirt_alt(PARAVIRT_CALL) \ 390 post \ 391 : "=a" (__eax), "=d" (__edx), \ 392 "=c" (__ecx) \ 393 : paravirt_type(op), \ 394 paravirt_clobber(CLBR_ANY), \ 395 ##__VA_ARGS__ \ 396 : "memory", "cc"); \ 397 __ret = (rettype)__eax; \ 398 } \ 399 __ret; \ 400 }) 401 #define __PVOP_VCALL(op, pre, post, ...) \ 402 ({ \ 403 unsigned long __eax, __edx, __ecx; \ 404 asm volatile(pre \ 405 paravirt_alt(PARAVIRT_CALL) \ 406 post \ 407 : "=a" (__eax), "=d" (__edx), "=c" (__ecx) \ 408 : paravirt_type(op), \ 409 paravirt_clobber(CLBR_ANY), \ 410 ##__VA_ARGS__ \ 411 : "memory", "cc"); \ 412 }) 413 414 #define PVOP_CALL0(rettype, op) \ 415 __PVOP_CALL(rettype, op, "", "") 416 #define PVOP_VCALL0(op) \ 417 __PVOP_VCALL(op, "", "") 418 419 #define PVOP_CALL1(rettype, op, arg1) \ 420 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1))) 421 #define PVOP_VCALL1(op, arg1) \ 422 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1))) 423 424 #define PVOP_CALL2(rettype, op, arg1, arg2) \ 425 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) 426 #define PVOP_VCALL2(op, arg1, arg2) \ 427 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1" ((u32)(arg2))) 428 429 #define PVOP_CALL3(rettype, op, arg1, arg2, arg3) \ 430 __PVOP_CALL(rettype, op, "", "", "0" ((u32)(arg1)), \ 431 "1"((u32)(arg2)), "2"((u32)(arg3))) 432 #define PVOP_VCALL3(op, arg1, arg2, arg3) \ 433 __PVOP_VCALL(op, "", "", "0" ((u32)(arg1)), "1"((u32)(arg2)), \ 434 "2"((u32)(arg3))) 435 436 #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4) \ 437 __PVOP_CALL(rettype, op, \ 438 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 439 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 440 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 441 #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4) \ 442 __PVOP_VCALL(op, \ 443 "push %[_arg4];", "lea 4(%%esp),%%esp;", \ 444 "0" ((u32)(arg1)), "1" ((u32)(arg2)), \ 445 "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4))) 446 447 static inline int paravirt_enabled(void) 448 { 449 return pv_info.paravirt_enabled; 450 } 451 452 static inline void load_esp0(struct tss_struct *tss, 453 struct thread_struct *thread) 454 { 455 PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread); 456 } 457 458 #define ARCH_SETUP pv_init_ops.arch_setup(); 459 static inline unsigned long get_wallclock(void) 460 { 461 return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock); 462 } 463 464 static inline int set_wallclock(unsigned long nowtime) 465 { 466 return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime); 467 } 468 469 static inline void (*choose_time_init(void))(void) 470 { 471 return pv_time_ops.time_init; 472 } 473 474 /* The paravirtualized CPUID instruction. */ 475 static inline void __cpuid(unsigned int *eax, unsigned int *ebx, 476 unsigned int *ecx, unsigned int *edx) 477 { 478 PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); 479 } 480 481 /* 482 * These special macros can be used to get or set a debugging register 483 */ 484 static inline unsigned long paravirt_get_debugreg(int reg) 485 { 486 return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); 487 } 488 #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) 489 static inline void set_debugreg(unsigned long val, int reg) 490 { 491 PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); 492 } 493 494 static inline void clts(void) 495 { 496 PVOP_VCALL0(pv_cpu_ops.clts); 497 } 498 499 static inline unsigned long read_cr0(void) 500 { 501 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); 502 } 503 504 static inline void write_cr0(unsigned long x) 505 { 506 PVOP_VCALL1(pv_cpu_ops.write_cr0, x); 507 } 508 509 static inline unsigned long read_cr2(void) 510 { 511 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); 512 } 513 514 static inline void write_cr2(unsigned long x) 515 { 516 PVOP_VCALL1(pv_mmu_ops.write_cr2, x); 517 } 518 519 static inline unsigned long read_cr3(void) 520 { 521 return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); 522 } 523 524 static inline void write_cr3(unsigned long x) 525 { 526 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 527 } 528 529 static inline unsigned long read_cr4(void) 530 { 531 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 532 } 533 static inline unsigned long read_cr4_safe(void) 534 { 535 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 536 } 537 538 static inline void write_cr4(unsigned long x) 539 { 540 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 541 } 542 543 static inline void raw_safe_halt(void) 544 { 545 PVOP_VCALL0(pv_irq_ops.safe_halt); 546 } 547 548 static inline void halt(void) 549 { 550 PVOP_VCALL0(pv_irq_ops.safe_halt); 551 } 552 553 static inline void wbinvd(void) 554 { 555 PVOP_VCALL0(pv_cpu_ops.wbinvd); 556 } 557 558 #define get_kernel_rpl() (pv_info.kernel_rpl) 559 560 static inline u64 paravirt_read_msr(unsigned msr, int *err) 561 { 562 return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); 563 } 564 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) 565 { 566 return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); 567 } 568 569 /* These should all do BUG_ON(_err), but our headers are too tangled. */ 570 #define rdmsr(msr,val1,val2) do { \ 571 int _err; \ 572 u64 _l = paravirt_read_msr(msr, &_err); \ 573 val1 = (u32)_l; \ 574 val2 = _l >> 32; \ 575 } while(0) 576 577 #define wrmsr(msr,val1,val2) do { \ 578 paravirt_write_msr(msr, val1, val2); \ 579 } while(0) 580 581 #define rdmsrl(msr,val) do { \ 582 int _err; \ 583 val = paravirt_read_msr(msr, &_err); \ 584 } while(0) 585 586 #define wrmsrl(msr,val) wrmsr(msr, (u32)((u64)(val)), ((u64)(val))>>32) 587 #define wrmsr_safe(msr,a,b) paravirt_write_msr(msr, a, b) 588 589 /* rdmsr with exception handling */ 590 #define rdmsr_safe(msr,a,b) ({ \ 591 int _err; \ 592 u64 _l = paravirt_read_msr(msr, &_err); \ 593 (*a) = (u32)_l; \ 594 (*b) = _l >> 32; \ 595 _err; }) 596 597 598 static inline u64 paravirt_read_tsc(void) 599 { 600 return PVOP_CALL0(u64, pv_cpu_ops.read_tsc); 601 } 602 603 #define rdtscl(low) do { \ 604 u64 _l = paravirt_read_tsc(); \ 605 low = (int)_l; \ 606 } while(0) 607 608 #define rdtscll(val) (val = paravirt_read_tsc()) 609 610 static inline unsigned long long paravirt_sched_clock(void) 611 { 612 return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); 613 } 614 #define calculate_cpu_khz() (pv_time_ops.get_cpu_khz()) 615 616 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) 617 618 static inline unsigned long long paravirt_read_pmc(int counter) 619 { 620 return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter); 621 } 622 623 #define rdpmc(counter,low,high) do { \ 624 u64 _l = paravirt_read_pmc(counter); \ 625 low = (u32)_l; \ 626 high = _l >> 32; \ 627 } while(0) 628 629 static inline void load_TR_desc(void) 630 { 631 PVOP_VCALL0(pv_cpu_ops.load_tr_desc); 632 } 633 static inline void load_gdt(const struct Xgt_desc_struct *dtr) 634 { 635 PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr); 636 } 637 static inline void load_idt(const struct Xgt_desc_struct *dtr) 638 { 639 PVOP_VCALL1(pv_cpu_ops.load_idt, dtr); 640 } 641 static inline void set_ldt(const void *addr, unsigned entries) 642 { 643 PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries); 644 } 645 static inline void store_gdt(struct Xgt_desc_struct *dtr) 646 { 647 PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr); 648 } 649 static inline void store_idt(struct Xgt_desc_struct *dtr) 650 { 651 PVOP_VCALL1(pv_cpu_ops.store_idt, dtr); 652 } 653 static inline unsigned long paravirt_store_tr(void) 654 { 655 return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr); 656 } 657 #define store_tr(tr) ((tr) = paravirt_store_tr()) 658 static inline void load_TLS(struct thread_struct *t, unsigned cpu) 659 { 660 PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu); 661 } 662 static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high) 663 { 664 PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high); 665 } 666 static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high) 667 { 668 PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high); 669 } 670 static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high) 671 { 672 PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high); 673 } 674 static inline void set_iopl_mask(unsigned mask) 675 { 676 PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask); 677 } 678 679 /* The paravirtualized I/O functions */ 680 static inline void slow_down_io(void) { 681 pv_cpu_ops.io_delay(); 682 #ifdef REALLY_SLOW_IO 683 pv_cpu_ops.io_delay(); 684 pv_cpu_ops.io_delay(); 685 pv_cpu_ops.io_delay(); 686 #endif 687 } 688 689 #ifdef CONFIG_X86_LOCAL_APIC 690 /* 691 * Basic functions accessing APICs. 692 */ 693 static inline void apic_write(unsigned long reg, unsigned long v) 694 { 695 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); 696 } 697 698 static inline void apic_write_atomic(unsigned long reg, unsigned long v) 699 { 700 PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); 701 } 702 703 static inline unsigned long apic_read(unsigned long reg) 704 { 705 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); 706 } 707 708 static inline void setup_boot_clock(void) 709 { 710 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 711 } 712 713 static inline void setup_secondary_clock(void) 714 { 715 PVOP_VCALL0(pv_apic_ops.setup_secondary_clock); 716 } 717 #endif 718 719 static inline void paravirt_post_allocator_init(void) 720 { 721 if (pv_init_ops.post_allocator_init) 722 (*pv_init_ops.post_allocator_init)(); 723 } 724 725 static inline void paravirt_pagetable_setup_start(pgd_t *base) 726 { 727 (*pv_mmu_ops.pagetable_setup_start)(base); 728 } 729 730 static inline void paravirt_pagetable_setup_done(pgd_t *base) 731 { 732 (*pv_mmu_ops.pagetable_setup_done)(base); 733 } 734 735 #ifdef CONFIG_SMP 736 static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, 737 unsigned long start_esp) 738 { 739 PVOP_VCALL3(pv_apic_ops.startup_ipi_hook, 740 phys_apicid, start_eip, start_esp); 741 } 742 #endif 743 744 static inline void paravirt_activate_mm(struct mm_struct *prev, 745 struct mm_struct *next) 746 { 747 PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next); 748 } 749 750 static inline void arch_dup_mmap(struct mm_struct *oldmm, 751 struct mm_struct *mm) 752 { 753 PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm); 754 } 755 756 static inline void arch_exit_mmap(struct mm_struct *mm) 757 { 758 PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm); 759 } 760 761 static inline void __flush_tlb(void) 762 { 763 PVOP_VCALL0(pv_mmu_ops.flush_tlb_user); 764 } 765 static inline void __flush_tlb_global(void) 766 { 767 PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel); 768 } 769 static inline void __flush_tlb_single(unsigned long addr) 770 { 771 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 772 } 773 774 static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 775 unsigned long va) 776 { 777 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 778 } 779 780 static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn) 781 { 782 PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn); 783 } 784 static inline void paravirt_release_pt(unsigned pfn) 785 { 786 PVOP_VCALL1(pv_mmu_ops.release_pt, pfn); 787 } 788 789 static inline void paravirt_alloc_pd(unsigned pfn) 790 { 791 PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn); 792 } 793 794 static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn, 795 unsigned start, unsigned count) 796 { 797 PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count); 798 } 799 static inline void paravirt_release_pd(unsigned pfn) 800 { 801 PVOP_VCALL1(pv_mmu_ops.release_pd, pfn); 802 } 803 804 #ifdef CONFIG_HIGHPTE 805 static inline void *kmap_atomic_pte(struct page *page, enum km_type type) 806 { 807 unsigned long ret; 808 ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type); 809 return (void *)ret; 810 } 811 #endif 812 813 static inline void pte_update(struct mm_struct *mm, unsigned long addr, 814 pte_t *ptep) 815 { 816 PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep); 817 } 818 819 static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr, 820 pte_t *ptep) 821 { 822 PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep); 823 } 824 825 #ifdef CONFIG_X86_PAE 826 static inline pte_t __pte(unsigned long long val) 827 { 828 unsigned long long ret = PVOP_CALL2(unsigned long long, 829 pv_mmu_ops.make_pte, 830 val, val >> 32); 831 return (pte_t) { ret, ret >> 32 }; 832 } 833 834 static inline pmd_t __pmd(unsigned long long val) 835 { 836 return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd, 837 val, val >> 32) }; 838 } 839 840 static inline pgd_t __pgd(unsigned long long val) 841 { 842 return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd, 843 val, val >> 32) }; 844 } 845 846 static inline unsigned long long pte_val(pte_t x) 847 { 848 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val, 849 x.pte_low, x.pte_high); 850 } 851 852 static inline unsigned long long pmd_val(pmd_t x) 853 { 854 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val, 855 x.pmd, x.pmd >> 32); 856 } 857 858 static inline unsigned long long pgd_val(pgd_t x) 859 { 860 return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val, 861 x.pgd, x.pgd >> 32); 862 } 863 864 static inline void set_pte(pte_t *ptep, pte_t pteval) 865 { 866 PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high); 867 } 868 869 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 870 pte_t *ptep, pte_t pteval) 871 { 872 /* 5 arg words */ 873 pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval); 874 } 875 876 static inline void set_pte_atomic(pte_t *ptep, pte_t pteval) 877 { 878 PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep, 879 pteval.pte_low, pteval.pte_high); 880 } 881 882 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, 883 pte_t *ptep, pte_t pte) 884 { 885 /* 5 arg words */ 886 pv_mmu_ops.set_pte_present(mm, addr, ptep, pte); 887 } 888 889 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 890 { 891 PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp, 892 pmdval.pmd, pmdval.pmd >> 32); 893 } 894 895 static inline void set_pud(pud_t *pudp, pud_t pudval) 896 { 897 PVOP_VCALL3(pv_mmu_ops.set_pud, pudp, 898 pudval.pgd.pgd, pudval.pgd.pgd >> 32); 899 } 900 901 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 902 { 903 PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep); 904 } 905 906 static inline void pmd_clear(pmd_t *pmdp) 907 { 908 PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp); 909 } 910 911 #else /* !CONFIG_X86_PAE */ 912 913 static inline pte_t __pte(unsigned long val) 914 { 915 return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) }; 916 } 917 918 static inline pgd_t __pgd(unsigned long val) 919 { 920 return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) }; 921 } 922 923 static inline unsigned long pte_val(pte_t x) 924 { 925 return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low); 926 } 927 928 static inline unsigned long pgd_val(pgd_t x) 929 { 930 return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd); 931 } 932 933 static inline void set_pte(pte_t *ptep, pte_t pteval) 934 { 935 PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low); 936 } 937 938 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 939 pte_t *ptep, pte_t pteval) 940 { 941 PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low); 942 } 943 944 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval) 945 { 946 PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd); 947 } 948 #endif /* CONFIG_X86_PAE */ 949 950 /* Lazy mode for batching updates / context switch */ 951 enum paravirt_lazy_mode { 952 PARAVIRT_LAZY_NONE, 953 PARAVIRT_LAZY_MMU, 954 PARAVIRT_LAZY_CPU, 955 }; 956 957 enum paravirt_lazy_mode paravirt_get_lazy_mode(void); 958 void paravirt_enter_lazy_cpu(void); 959 void paravirt_leave_lazy_cpu(void); 960 void paravirt_enter_lazy_mmu(void); 961 void paravirt_leave_lazy_mmu(void); 962 void paravirt_leave_lazy(enum paravirt_lazy_mode mode); 963 964 #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE 965 static inline void arch_enter_lazy_cpu_mode(void) 966 { 967 PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); 968 } 969 970 static inline void arch_leave_lazy_cpu_mode(void) 971 { 972 PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); 973 } 974 975 static inline void arch_flush_lazy_cpu_mode(void) 976 { 977 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) { 978 arch_leave_lazy_cpu_mode(); 979 arch_enter_lazy_cpu_mode(); 980 } 981 } 982 983 984 #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE 985 static inline void arch_enter_lazy_mmu_mode(void) 986 { 987 PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter); 988 } 989 990 static inline void arch_leave_lazy_mmu_mode(void) 991 { 992 PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave); 993 } 994 995 static inline void arch_flush_lazy_mmu_mode(void) 996 { 997 if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) { 998 arch_leave_lazy_mmu_mode(); 999 arch_enter_lazy_mmu_mode(); 1000 } 1001 } 1002 1003 void _paravirt_nop(void); 1004 #define paravirt_nop ((void *)_paravirt_nop) 1005 1006 /* These all sit in the .parainstructions section to tell us what to patch. */ 1007 struct paravirt_patch_site { 1008 u8 *instr; /* original instructions */ 1009 u8 instrtype; /* type of this instruction */ 1010 u8 len; /* length of original instruction */ 1011 u16 clobbers; /* what registers you may clobber */ 1012 }; 1013 1014 extern struct paravirt_patch_site __parainstructions[], 1015 __parainstructions_end[]; 1016 1017 static inline unsigned long __raw_local_save_flags(void) 1018 { 1019 unsigned long f; 1020 1021 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1022 PARAVIRT_CALL 1023 "popl %%edx; popl %%ecx") 1024 : "=a"(f) 1025 : paravirt_type(pv_irq_ops.save_fl), 1026 paravirt_clobber(CLBR_EAX) 1027 : "memory", "cc"); 1028 return f; 1029 } 1030 1031 static inline void raw_local_irq_restore(unsigned long f) 1032 { 1033 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1034 PARAVIRT_CALL 1035 "popl %%edx; popl %%ecx") 1036 : "=a"(f) 1037 : "0"(f), 1038 paravirt_type(pv_irq_ops.restore_fl), 1039 paravirt_clobber(CLBR_EAX) 1040 : "memory", "cc"); 1041 } 1042 1043 static inline void raw_local_irq_disable(void) 1044 { 1045 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1046 PARAVIRT_CALL 1047 "popl %%edx; popl %%ecx") 1048 : 1049 : paravirt_type(pv_irq_ops.irq_disable), 1050 paravirt_clobber(CLBR_EAX) 1051 : "memory", "eax", "cc"); 1052 } 1053 1054 static inline void raw_local_irq_enable(void) 1055 { 1056 asm volatile(paravirt_alt("pushl %%ecx; pushl %%edx;" 1057 PARAVIRT_CALL 1058 "popl %%edx; popl %%ecx") 1059 : 1060 : paravirt_type(pv_irq_ops.irq_enable), 1061 paravirt_clobber(CLBR_EAX) 1062 : "memory", "eax", "cc"); 1063 } 1064 1065 static inline unsigned long __raw_local_irq_save(void) 1066 { 1067 unsigned long f; 1068 1069 f = __raw_local_save_flags(); 1070 raw_local_irq_disable(); 1071 return f; 1072 } 1073 1074 #define CLI_STRING \ 1075 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1076 "call *%[paravirt_cli_opptr];" \ 1077 "popl %%edx; popl %%ecx", \ 1078 "%c[paravirt_cli_type]", "%c[paravirt_clobber]") 1079 1080 #define STI_STRING \ 1081 _paravirt_alt("pushl %%ecx; pushl %%edx;" \ 1082 "call *%[paravirt_sti_opptr];" \ 1083 "popl %%edx; popl %%ecx", \ 1084 "%c[paravirt_sti_type]", "%c[paravirt_clobber]") 1085 1086 #define CLI_STI_CLOBBERS , "%eax" 1087 #define CLI_STI_INPUT_ARGS \ 1088 , \ 1089 [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)), \ 1090 [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable), \ 1091 [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)), \ 1092 [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable), \ 1093 paravirt_clobber(CLBR_EAX) 1094 1095 /* Make sure as little as possible of this mess escapes. */ 1096 #undef PARAVIRT_CALL 1097 #undef __PVOP_CALL 1098 #undef __PVOP_VCALL 1099 #undef PVOP_VCALL0 1100 #undef PVOP_CALL0 1101 #undef PVOP_VCALL1 1102 #undef PVOP_CALL1 1103 #undef PVOP_VCALL2 1104 #undef PVOP_CALL2 1105 #undef PVOP_VCALL3 1106 #undef PVOP_CALL3 1107 #undef PVOP_VCALL4 1108 #undef PVOP_CALL4 1109 1110 #else /* __ASSEMBLY__ */ 1111 1112 #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 4) 1113 1114 #define PARA_SITE(ptype, clobbers, ops) \ 1115 771:; \ 1116 ops; \ 1117 772:; \ 1118 .pushsection .parainstructions,"a"; \ 1119 .long 771b; \ 1120 .byte ptype; \ 1121 .byte 772b-771b; \ 1122 .short clobbers; \ 1123 .popsection 1124 1125 #define INTERRUPT_RETURN \ 1126 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \ 1127 jmp *%cs:pv_cpu_ops+PV_CPU_iret) 1128 1129 #define DISABLE_INTERRUPTS(clobbers) \ 1130 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \ 1131 pushl %eax; pushl %ecx; pushl %edx; \ 1132 call *%cs:pv_irq_ops+PV_IRQ_irq_disable; \ 1133 popl %edx; popl %ecx; popl %eax) \ 1134 1135 #define ENABLE_INTERRUPTS(clobbers) \ 1136 PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers, \ 1137 pushl %eax; pushl %ecx; pushl %edx; \ 1138 call *%cs:pv_irq_ops+PV_IRQ_irq_enable; \ 1139 popl %edx; popl %ecx; popl %eax) 1140 1141 #define ENABLE_INTERRUPTS_SYSEXIT \ 1142 PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\ 1143 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit) 1144 1145 #define GET_CR0_INTO_EAX \ 1146 push %ecx; push %edx; \ 1147 call *pv_cpu_ops+PV_CPU_read_cr0; \ 1148 pop %edx; pop %ecx 1149 1150 #endif /* __ASSEMBLY__ */ 1151 #endif /* CONFIG_PARAVIRT */ 1152 #endif /* __ASM_PARAVIRT_H */ 1153