1 /* 2 * QEMU KVM support 3 * 4 * Copyright (C) 2006-2008 Qumranet Technologies 5 * Copyright IBM, Corp. 2008 6 * 7 * Authors: 8 * Anthony Liguori <aliguori (at) us.ibm.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include <sys/types.h> 16 #include <sys/ioctl.h> 17 #include <sys/mman.h> 18 19 #undef __user 20 #define __xuser /* nothing */ 21 #include <linux/kvm.h> 22 23 #include "qemu-common.h" 24 #include "sysemu/sysemu.h" 25 #include "sysemu/kvm.h" 26 #include "cpu.h" 27 #include "exec/gdbstub.h" 28 29 #ifdef CONFIG_KVM_GS_RESTORE 30 #include "kvm-gs-restore.h" 31 #endif 32 33 //#define DEBUG_KVM 34 35 #ifdef DEBUG_KVM 36 #define dprintf(fmt, ...) \ 37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 38 #else 39 #define dprintf(fmt, ...) \ 40 do { } while (0) 41 #endif 42 43 #ifdef KVM_CAP_EXT_CPUID 44 45 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) 46 { 47 struct kvm_cpuid2 *cpuid; 48 int r, size; 49 50 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); 51 cpuid = (struct kvm_cpuid2 *)g_malloc0(size); 52 cpuid->nent = max; 53 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); 54 if (r == 0 && cpuid->nent >= max) { 55 r = -E2BIG; 56 } 57 if (r < 0) { 58 if (r == -E2BIG) { 59 g_free(cpuid); 60 return NULL; 61 } else { 62 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", 63 strerror(-r)); 64 exit(1); 65 } 66 } 67 return cpuid; 68 } 69 70 uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg) 71 { 72 struct kvm_cpuid2 *cpuid; 73 int i, max; 74 uint32_t ret = 0; 75 uint32_t cpuid_1_edx; 76 77 if (!kvm_check_extension(cpu->kvm_state, KVM_CAP_EXT_CPUID)) { 78 return -1U; 79 } 80 81 max = 1; 82 while ((cpuid = try_get_cpuid(cpu->kvm_state, max)) == NULL) { 83 max *= 2; 84 } 85 86 for (i = 0; i < cpuid->nent; ++i) { 87 if (cpuid->entries[i].function == function) { 88 switch (reg) { 89 case R_EAX: 90 ret = cpuid->entries[i].eax; 91 break; 92 case R_EBX: 93 ret = cpuid->entries[i].ebx; 94 break; 95 case R_ECX: 96 ret = cpuid->entries[i].ecx; 97 break; 98 case R_EDX: 99 ret = cpuid->entries[i].edx; 100 if (function == 0x80000001) { 101 /* On Intel, kvm returns cpuid according to the Intel spec, 102 * so add missing bits according to the AMD spec: 103 */ 104 cpuid_1_edx = kvm_arch_get_supported_cpuid(cpu, 1, R_EDX); 105 ret |= cpuid_1_edx & 0xdfeff7ff; 106 } 107 break; 108 } 109 } 110 } 111 112 g_free(cpuid); 113 114 return ret; 115 } 116 117 #else 118 119 uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg) 120 { 121 return -1U; 122 } 123 124 #endif 125 126 #ifndef KVM_MP_STATE_RUNNABLE 127 #define KVM_MP_STATE_RUNNABLE 0 128 #endif 129 130 int kvm_arch_init_vcpu(CPUState *cpu) 131 { 132 struct { 133 struct kvm_cpuid2 cpuid; 134 struct kvm_cpuid_entry2 entries[100]; 135 } __attribute__((packed)) cpuid_data; 136 uint32_t limit, i, j, cpuid_i; 137 uint32_t unused; 138 CPUX86State *env = cpu->env_ptr; 139 140 env->mp_state = KVM_MP_STATE_RUNNABLE; 141 142 cpuid_i = 0; 143 144 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); 145 146 for (i = 0; i <= limit; i++) { 147 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++]; 148 149 switch (i) { 150 case 2: { 151 /* Keep reading function 2 till all the input is received */ 152 int times; 153 154 c->function = i; 155 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | 156 KVM_CPUID_FLAG_STATE_READ_NEXT; 157 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 158 times = c->eax & 0xff; 159 160 for (j = 1; j < times; ++j) { 161 c = &cpuid_data.entries[cpuid_i++]; 162 c->function = i; 163 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; 164 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 165 } 166 break; 167 } 168 case 4: 169 case 0xb: 170 case 0xd: 171 for (j = 0; ; j++) { 172 c->function = i; 173 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 174 c->index = j; 175 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 176 177 if (i == 4 && c->eax == 0) 178 break; 179 if (i == 0xb && !(c->ecx & 0xff00)) 180 break; 181 if (i == 0xd && c->eax == 0) 182 break; 183 184 c = &cpuid_data.entries[cpuid_i++]; 185 } 186 break; 187 default: 188 c->function = i; 189 c->flags = 0; 190 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 191 break; 192 } 193 } 194 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); 195 196 for (i = 0x80000000; i <= limit; i++) { 197 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++]; 198 199 c->function = i; 200 c->flags = 0; 201 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 202 } 203 204 cpuid_data.cpuid.nent = cpuid_i; 205 206 return kvm_vcpu_ioctl(cpu, KVM_SET_CPUID2, &cpuid_data); 207 } 208 209 static int kvm_has_msr_star(CPUX86State *env) 210 { 211 static int has_msr_star; 212 int ret; 213 CPUState *cpu = ENV_GET_CPU(env); 214 215 /* first time */ 216 if (has_msr_star == 0) { 217 struct kvm_msr_list msr_list, *kvm_msr_list; 218 219 has_msr_star = -1; 220 221 /* Obtain MSR list from KVM. These are the MSRs that we must 222 * save/restore */ 223 msr_list.nmsrs = 0; 224 ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); 225 if (ret < 0) 226 return 0; 227 228 kvm_msr_list = g_malloc0(sizeof(msr_list) + 229 msr_list.nmsrs * sizeof(msr_list.indices[0])); 230 231 kvm_msr_list->nmsrs = msr_list.nmsrs; 232 ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); 233 if (ret >= 0) { 234 int i; 235 236 for (i = 0; i < kvm_msr_list->nmsrs; i++) { 237 if (kvm_msr_list->indices[i] == MSR_STAR) { 238 has_msr_star = 1; 239 break; 240 } 241 } 242 } 243 244 free(kvm_msr_list); 245 } 246 247 if (has_msr_star == 1) 248 return 1; 249 return 0; 250 } 251 252 int kvm_arch_init(KVMState *s, int smp_cpus) 253 { 254 int ret; 255 256 /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code 257 * directly. In order to use vm86 mode, a TSS is needed. Since this 258 * must be part of guest physical memory, we need to allocate it. Older 259 * versions of KVM just assumed that it would be at the end of physical 260 * memory but that doesn't work with more than 4GB of memory. We simply 261 * refuse to work with those older versions of KVM. */ 262 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); 263 if (ret <= 0) { 264 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); 265 return ret; 266 } 267 268 /* this address is 3 pages before the bios, and the bios should present 269 * as unavaible memory. FIXME, need to ensure the e820 map deals with 270 * this? 271 */ 272 return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000); 273 } 274 275 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 276 { 277 lhs->selector = rhs->selector; 278 lhs->base = rhs->base; 279 lhs->limit = rhs->limit; 280 lhs->type = 3; 281 lhs->present = 1; 282 lhs->dpl = 3; 283 lhs->db = 0; 284 lhs->s = 1; 285 lhs->l = 0; 286 lhs->g = 0; 287 lhs->avl = 0; 288 lhs->unusable = 0; 289 } 290 291 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 292 { 293 unsigned flags = rhs->flags; 294 lhs->selector = rhs->selector; 295 lhs->base = rhs->base; 296 lhs->limit = rhs->limit; 297 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; 298 lhs->present = (flags & DESC_P_MASK) != 0; 299 lhs->dpl = rhs->selector & 3; 300 lhs->db = (flags >> DESC_B_SHIFT) & 1; 301 lhs->s = (flags & DESC_S_MASK) != 0; 302 lhs->l = (flags >> DESC_L_SHIFT) & 1; 303 lhs->g = (flags & DESC_G_MASK) != 0; 304 lhs->avl = (flags & DESC_AVL_MASK) != 0; 305 lhs->unusable = 0; 306 } 307 308 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) 309 { 310 lhs->selector = rhs->selector; 311 lhs->base = rhs->base; 312 lhs->limit = rhs->limit; 313 lhs->flags = 314 (rhs->type << DESC_TYPE_SHIFT) 315 | (rhs->present * DESC_P_MASK) 316 | (rhs->dpl << DESC_DPL_SHIFT) 317 | (rhs->db << DESC_B_SHIFT) 318 | (rhs->s * DESC_S_MASK) 319 | (rhs->l << DESC_L_SHIFT) 320 | (rhs->g * DESC_G_MASK) 321 | (rhs->avl * DESC_AVL_MASK); 322 } 323 324 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) 325 { 326 if (set) 327 *kvm_reg = *qemu_reg; 328 else 329 *qemu_reg = *kvm_reg; 330 } 331 332 static int kvm_getput_regs(CPUX86State *env, int set) 333 { 334 struct kvm_regs regs; 335 int ret = 0; 336 337 if (!set) { 338 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_REGS, ®s); 339 if (ret < 0) 340 return ret; 341 } 342 343 kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); 344 kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); 345 kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); 346 kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); 347 kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); 348 kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); 349 kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); 350 kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); 351 #ifdef TARGET_X86_64 352 kvm_getput_reg(®s.r8, &env->regs[8], set); 353 kvm_getput_reg(®s.r9, &env->regs[9], set); 354 kvm_getput_reg(®s.r10, &env->regs[10], set); 355 kvm_getput_reg(®s.r11, &env->regs[11], set); 356 kvm_getput_reg(®s.r12, &env->regs[12], set); 357 kvm_getput_reg(®s.r13, &env->regs[13], set); 358 kvm_getput_reg(®s.r14, &env->regs[14], set); 359 kvm_getput_reg(®s.r15, &env->regs[15], set); 360 #endif 361 362 kvm_getput_reg(®s.rflags, &env->eflags, set); 363 kvm_getput_reg(®s.rip, &env->eip, set); 364 365 if (set) 366 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_REGS, ®s); 367 368 return ret; 369 } 370 371 static int kvm_put_fpu(CPUX86State *env) 372 { 373 struct kvm_fpu fpu; 374 int i; 375 376 memset(&fpu, 0, sizeof fpu); 377 fpu.fsw = env->fpus & ~(7 << 11); 378 fpu.fsw |= (env->fpstt & 7) << 11; 379 fpu.fcw = env->fpuc; 380 for (i = 0; i < 8; ++i) 381 fpu.ftwx |= (!env->fptags[i]) << i; 382 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); 383 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); 384 fpu.mxcsr = env->mxcsr; 385 386 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_FPU, &fpu); 387 } 388 389 static int kvm_put_sregs(CPUX86State *env) 390 { 391 struct kvm_sregs sregs; 392 393 memcpy(sregs.interrupt_bitmap, 394 env->interrupt_bitmap, 395 sizeof(sregs.interrupt_bitmap)); 396 397 if ((env->eflags & VM_MASK)) { 398 set_v8086_seg(&sregs.cs, &env->segs[R_CS]); 399 set_v8086_seg(&sregs.ds, &env->segs[R_DS]); 400 set_v8086_seg(&sregs.es, &env->segs[R_ES]); 401 set_v8086_seg(&sregs.fs, &env->segs[R_FS]); 402 set_v8086_seg(&sregs.gs, &env->segs[R_GS]); 403 set_v8086_seg(&sregs.ss, &env->segs[R_SS]); 404 } else { 405 set_seg(&sregs.cs, &env->segs[R_CS]); 406 set_seg(&sregs.ds, &env->segs[R_DS]); 407 set_seg(&sregs.es, &env->segs[R_ES]); 408 set_seg(&sregs.fs, &env->segs[R_FS]); 409 set_seg(&sregs.gs, &env->segs[R_GS]); 410 set_seg(&sregs.ss, &env->segs[R_SS]); 411 412 if (env->cr[0] & CR0_PE_MASK) { 413 /* force ss cpl to cs cpl */ 414 sregs.ss.selector = (sregs.ss.selector & ~3) | 415 (sregs.cs.selector & 3); 416 sregs.ss.dpl = sregs.ss.selector & 3; 417 } 418 } 419 420 set_seg(&sregs.tr, &env->tr); 421 set_seg(&sregs.ldt, &env->ldt); 422 423 sregs.idt.limit = env->idt.limit; 424 sregs.idt.base = env->idt.base; 425 sregs.gdt.limit = env->gdt.limit; 426 sregs.gdt.base = env->gdt.base; 427 428 sregs.cr0 = env->cr[0]; 429 sregs.cr2 = env->cr[2]; 430 sregs.cr3 = env->cr[3]; 431 sregs.cr4 = env->cr[4]; 432 433 sregs.cr8 = cpu_get_apic_tpr(env); 434 sregs.apic_base = cpu_get_apic_base(env); 435 436 sregs.efer = env->efer; 437 438 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_SREGS, &sregs); 439 } 440 441 static void kvm_msr_entry_set(struct kvm_msr_entry *entry, 442 uint32_t index, uint64_t value) 443 { 444 entry->index = index; 445 entry->data = value; 446 } 447 448 static int kvm_put_msrs(CPUX86State *env) 449 { 450 struct { 451 struct kvm_msrs info; 452 struct kvm_msr_entry entries[100]; 453 } msr_data; 454 struct kvm_msr_entry *msrs = msr_data.entries; 455 int n = 0; 456 457 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); 458 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); 459 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); 460 if (kvm_has_msr_star(env)) 461 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); 462 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); 463 #ifdef TARGET_X86_64 464 /* FIXME if lm capable */ 465 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); 466 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); 467 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); 468 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); 469 #endif 470 msr_data.info.nmsrs = n; 471 472 return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_MSRS, &msr_data); 473 474 } 475 476 477 static int kvm_get_fpu(CPUX86State *env) 478 { 479 struct kvm_fpu fpu; 480 int i, ret; 481 482 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_FPU, &fpu); 483 if (ret < 0) 484 return ret; 485 486 env->fpstt = (fpu.fsw >> 11) & 7; 487 env->fpus = fpu.fsw; 488 env->fpuc = fpu.fcw; 489 for (i = 0; i < 8; ++i) 490 env->fptags[i] = !((fpu.ftwx >> i) & 1); 491 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); 492 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); 493 env->mxcsr = fpu.mxcsr; 494 495 return 0; 496 } 497 498 int kvm_get_sregs(CPUState *cpu) 499 { 500 CPUX86State *env = cpu->env_ptr; 501 struct kvm_sregs sregs; 502 uint32_t hflags; 503 int ret; 504 505 ret = kvm_vcpu_ioctl(cpu, KVM_GET_SREGS, &sregs); 506 if (ret < 0) 507 return ret; 508 509 memcpy(env->interrupt_bitmap, 510 sregs.interrupt_bitmap, 511 sizeof(sregs.interrupt_bitmap)); 512 513 get_seg(&env->segs[R_CS], &sregs.cs); 514 get_seg(&env->segs[R_DS], &sregs.ds); 515 get_seg(&env->segs[R_ES], &sregs.es); 516 get_seg(&env->segs[R_FS], &sregs.fs); 517 get_seg(&env->segs[R_GS], &sregs.gs); 518 get_seg(&env->segs[R_SS], &sregs.ss); 519 520 get_seg(&env->tr, &sregs.tr); 521 get_seg(&env->ldt, &sregs.ldt); 522 523 env->idt.limit = sregs.idt.limit; 524 env->idt.base = sregs.idt.base; 525 env->gdt.limit = sregs.gdt.limit; 526 env->gdt.base = sregs.gdt.base; 527 528 env->cr[0] = sregs.cr0; 529 env->cr[2] = sregs.cr2; 530 env->cr[3] = sregs.cr3; 531 env->cr[4] = sregs.cr4; 532 533 cpu_set_apic_base(env, sregs.apic_base); 534 535 env->efer = sregs.efer; 536 //cpu_set_apic_tpr(env, sregs.cr8); 537 538 #define HFLAG_COPY_MASK ~( \ 539 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ 540 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ 541 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ 542 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) 543 544 545 546 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; 547 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); 548 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & 549 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); 550 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); 551 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << 552 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); 553 554 if (env->efer & MSR_EFER_LMA) { 555 hflags |= HF_LMA_MASK; 556 } 557 558 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) { 559 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; 560 } else { 561 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> 562 (DESC_B_SHIFT - HF_CS32_SHIFT); 563 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> 564 (DESC_B_SHIFT - HF_SS32_SHIFT); 565 if (!(env->cr[0] & CR0_PE_MASK) || 566 (env->eflags & VM_MASK) || 567 !(hflags & HF_CS32_MASK)) { 568 hflags |= HF_ADDSEG_MASK; 569 } else { 570 hflags |= ((env->segs[R_DS].base | 571 env->segs[R_ES].base | 572 env->segs[R_SS].base) != 0) << 573 HF_ADDSEG_SHIFT; 574 } 575 } 576 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; 577 578 return 0; 579 } 580 581 static int kvm_get_msrs(CPUX86State *env) 582 { 583 struct { 584 struct kvm_msrs info; 585 struct kvm_msr_entry entries[100]; 586 } msr_data; 587 struct kvm_msr_entry *msrs = msr_data.entries; 588 int ret, i, n; 589 590 n = 0; 591 msrs[n++].index = MSR_IA32_SYSENTER_CS; 592 msrs[n++].index = MSR_IA32_SYSENTER_ESP; 593 msrs[n++].index = MSR_IA32_SYSENTER_EIP; 594 if (kvm_has_msr_star(env)) 595 msrs[n++].index = MSR_STAR; 596 msrs[n++].index = MSR_IA32_TSC; 597 #ifdef TARGET_X86_64 598 /* FIXME lm_capable_kernel */ 599 msrs[n++].index = MSR_CSTAR; 600 msrs[n++].index = MSR_KERNELGSBASE; 601 msrs[n++].index = MSR_FMASK; 602 msrs[n++].index = MSR_LSTAR; 603 #endif 604 msr_data.info.nmsrs = n; 605 ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_MSRS, &msr_data); 606 if (ret < 0) 607 return ret; 608 609 for (i = 0; i < ret; i++) { 610 switch (msrs[i].index) { 611 case MSR_IA32_SYSENTER_CS: 612 env->sysenter_cs = msrs[i].data; 613 break; 614 case MSR_IA32_SYSENTER_ESP: 615 env->sysenter_esp = msrs[i].data; 616 break; 617 case MSR_IA32_SYSENTER_EIP: 618 env->sysenter_eip = msrs[i].data; 619 break; 620 case MSR_STAR: 621 env->star = msrs[i].data; 622 break; 623 #ifdef TARGET_X86_64 624 case MSR_CSTAR: 625 env->cstar = msrs[i].data; 626 break; 627 case MSR_KERNELGSBASE: 628 env->kernelgsbase = msrs[i].data; 629 break; 630 case MSR_FMASK: 631 env->fmask = msrs[i].data; 632 break; 633 case MSR_LSTAR: 634 env->lstar = msrs[i].data; 635 break; 636 #endif 637 case MSR_IA32_TSC: 638 env->tsc = msrs[i].data; 639 break; 640 } 641 } 642 643 return 0; 644 } 645 646 int kvm_arch_put_registers(CPUState *cpu) 647 { 648 CPUX86State *env = cpu->env_ptr; 649 int ret; 650 651 ret = kvm_getput_regs(env, 1); 652 if (ret < 0) 653 return ret; 654 655 ret = kvm_put_fpu(env); 656 if (ret < 0) 657 return ret; 658 659 ret = kvm_put_sregs(env); 660 if (ret < 0) 661 return ret; 662 663 ret = kvm_put_msrs(env); 664 if (ret < 0) 665 return ret; 666 667 ret = kvm_put_mp_state(cpu); 668 if (ret < 0) 669 return ret; 670 671 ret = kvm_get_mp_state(cpu); 672 if (ret < 0) 673 return ret; 674 675 return 0; 676 } 677 678 int kvm_arch_get_registers(CPUState *cpu) 679 { 680 int ret; 681 CPUX86State *env = cpu->env_ptr; 682 683 ret = kvm_getput_regs(env, 0); 684 if (ret < 0) 685 return ret; 686 687 ret = kvm_get_fpu(env); 688 if (ret < 0) 689 return ret; 690 691 ret = kvm_get_sregs(cpu); 692 if (ret < 0) 693 return ret; 694 695 ret = kvm_get_msrs(env); 696 if (ret < 0) 697 return ret; 698 699 return 0; 700 } 701 702 int kvm_arch_vcpu_run(CPUState *cpu) 703 { 704 #ifdef CONFIG_KVM_GS_RESTORE 705 if (gs_need_restore != KVM_GS_RESTORE_NO) 706 return no_gs_ioctl(cpu->kvm_fd, KVM_RUN, 0); 707 else 708 #endif 709 return kvm_vcpu_ioctl(cpu, KVM_RUN, 0); 710 } 711 712 int kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) 713 { 714 CPUX86State *env = cpu->env_ptr; 715 716 /* Try to inject an interrupt if the guest can accept it */ 717 if (run->ready_for_interrupt_injection && 718 (cpu->interrupt_request & CPU_INTERRUPT_HARD) && 719 (env->eflags & IF_MASK)) { 720 int irq; 721 722 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; 723 irq = cpu_get_pic_interrupt(env); 724 if (irq >= 0) { 725 struct kvm_interrupt intr; 726 intr.irq = irq; 727 /* FIXME: errors */ 728 dprintf("injected interrupt %d\n", irq); 729 kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr); 730 } 731 } 732 733 /* If we have an interrupt but the guest is not ready to receive an 734 * interrupt, request an interrupt window exit. This will 735 * cause a return to userspace as soon as the guest is ready to 736 * receive interrupts. */ 737 if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) 738 run->request_interrupt_window = 1; 739 else 740 run->request_interrupt_window = 0; 741 742 dprintf("setting tpr\n"); 743 run->cr8 = cpu_get_apic_tpr(env); 744 745 #ifdef CONFIG_KVM_GS_RESTORE 746 gs_base_pre_run(); 747 #endif 748 749 return 0; 750 } 751 752 int kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) 753 { 754 CPUX86State *env = cpu->env_ptr; 755 #ifdef CONFIG_KVM_GS_RESTORE 756 gs_base_post_run(); 757 #endif 758 if (run->if_flag) 759 env->eflags |= IF_MASK; 760 else 761 env->eflags &= ~IF_MASK; 762 763 cpu_set_apic_tpr(env, run->cr8); 764 cpu_set_apic_base(env, run->apic_base); 765 766 return 0; 767 } 768 769 static int kvm_handle_halt(CPUState *cpu) 770 { 771 CPUX86State *env = cpu->env_ptr; 772 773 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && 774 (env->eflags & IF_MASK)) && 775 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { 776 cpu->halted = 1; 777 env->exception_index = EXCP_HLT; 778 return 0; 779 } 780 781 return 1; 782 } 783 784 int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run) 785 { 786 int ret = 0; 787 788 switch (run->exit_reason) { 789 case KVM_EXIT_HLT: 790 dprintf("handle_hlt\n"); 791 ret = kvm_handle_halt(cpu); 792 break; 793 } 794 795 return ret; 796 } 797 798 #ifdef KVM_CAP_SET_GUEST_DEBUG 799 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp) 800 { 801 const static uint8_t int3 = 0xcc; 802 803 if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || 804 cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&int3, 1, 1)) 805 return -EINVAL; 806 return 0; 807 } 808 809 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp) 810 { 811 uint8_t int3; 812 813 if (cpu_memory_rw_debug(cpu, bp->pc, &int3, 1, 0) || int3 != 0xcc || 814 cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) 815 return -EINVAL; 816 return 0; 817 } 818 819 static struct { 820 target_ulong addr; 821 int len; 822 int type; 823 } hw_breakpoint[4]; 824 825 static int nb_hw_breakpoint; 826 827 static int find_hw_breakpoint(target_ulong addr, int len, int type) 828 { 829 int n; 830 831 for (n = 0; n < nb_hw_breakpoint; n++) 832 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && 833 (hw_breakpoint[n].len == len || len == -1)) 834 return n; 835 return -1; 836 } 837 838 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 839 target_ulong len, int type) 840 { 841 switch (type) { 842 case GDB_BREAKPOINT_HW: 843 len = 1; 844 break; 845 case GDB_WATCHPOINT_WRITE: 846 case GDB_WATCHPOINT_ACCESS: 847 switch (len) { 848 case 1: 849 break; 850 case 2: 851 case 4: 852 case 8: 853 if (addr & (len - 1)) 854 return -EINVAL; 855 break; 856 default: 857 return -EINVAL; 858 } 859 break; 860 default: 861 return -ENOSYS; 862 } 863 864 if (nb_hw_breakpoint == 4) 865 return -ENOBUFS; 866 867 if (find_hw_breakpoint(addr, len, type) >= 0) 868 return -EEXIST; 869 870 hw_breakpoint[nb_hw_breakpoint].addr = addr; 871 hw_breakpoint[nb_hw_breakpoint].len = len; 872 hw_breakpoint[nb_hw_breakpoint].type = type; 873 nb_hw_breakpoint++; 874 875 return 0; 876 } 877 878 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 879 target_ulong len, int type) 880 { 881 int n; 882 883 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); 884 if (n < 0) 885 return -ENOENT; 886 887 nb_hw_breakpoint--; 888 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; 889 890 return 0; 891 } 892 893 void kvm_arch_remove_all_hw_breakpoints(void) 894 { 895 nb_hw_breakpoint = 0; 896 } 897 898 static CPUWatchpoint hw_watchpoint; 899 900 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) 901 { 902 int handle = 0; 903 int n; 904 905 if (arch_info->exception == 1) { 906 if (arch_info->dr6 & (1 << 14)) { 907 if (current_cpu->singlestep_enabled) 908 handle = 1; 909 } else { 910 for (n = 0; n < 4; n++) 911 if (arch_info->dr6 & (1 << n)) 912 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { 913 case 0x0: 914 handle = 1; 915 break; 916 case 0x1: 917 handle = 1; 918 cpu_single_env->watchpoint_hit = &hw_watchpoint; 919 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 920 hw_watchpoint.flags = BP_MEM_WRITE; 921 break; 922 case 0x3: 923 handle = 1; 924 cpu_single_env->watchpoint_hit = &hw_watchpoint; 925 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 926 hw_watchpoint.flags = BP_MEM_ACCESS; 927 break; 928 } 929 } 930 } else if (kvm_find_sw_breakpoint(current_cpu, arch_info->pc)) 931 handle = 1; 932 933 if (!handle) 934 kvm_update_guest_debug(current_cpu, 935 (arch_info->exception == 1) ? 936 KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP); 937 938 return handle; 939 } 940 941 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) 942 { 943 const uint8_t type_code[] = { 944 [GDB_BREAKPOINT_HW] = 0x0, 945 [GDB_WATCHPOINT_WRITE] = 0x1, 946 [GDB_WATCHPOINT_ACCESS] = 0x3 947 }; 948 const uint8_t len_code[] = { 949 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2 950 }; 951 int n; 952 953 if (kvm_sw_breakpoints_active(cpu)) 954 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 955 956 if (nb_hw_breakpoint > 0) { 957 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 958 dbg->arch.debugreg[7] = 0x0600; 959 for (n = 0; n < nb_hw_breakpoint; n++) { 960 dbg->arch.debugreg[n] = hw_breakpoint[n].addr; 961 dbg->arch.debugreg[7] |= (2 << (n * 2)) | 962 (type_code[hw_breakpoint[n].type] << (16 + n*4)) | 963 (len_code[hw_breakpoint[n].len] << (18 + n*4)); 964 } 965 } 966 } 967 #endif /* KVM_CAP_SET_GUEST_DEBUG */ 968