1 /* 2 * QEMU KVM support 3 * 4 * Copyright (C) 2006-2008 Qumranet Technologies 5 * Copyright IBM, Corp. 2008 6 * 7 * Authors: 8 * Anthony Liguori <aliguori (at) us.ibm.com> 9 * 10 * This work is licensed under the terms of the GNU GPL, version 2 or later. 11 * See the COPYING file in the top-level directory. 12 * 13 */ 14 15 #include <sys/types.h> 16 #include <sys/ioctl.h> 17 #include <sys/mman.h> 18 19 #undef __user 20 #define __xuser /* nothing */ 21 #include <linux/kvm.h> 22 23 #include "qemu-common.h" 24 #include "sysemu.h" 25 #include "kvm.h" 26 #include "cpu.h" 27 #include "gdbstub.h" 28 29 #ifdef CONFIG_KVM_GS_RESTORE 30 #include "kvm-gs-restore.h" 31 #endif 32 33 //#define DEBUG_KVM 34 35 #ifdef DEBUG_KVM 36 #define dprintf(fmt, ...) \ 37 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) 38 #else 39 #define dprintf(fmt, ...) \ 40 do { } while (0) 41 #endif 42 43 #ifdef KVM_CAP_EXT_CPUID 44 45 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) 46 { 47 struct kvm_cpuid2 *cpuid; 48 int r, size; 49 50 size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); 51 cpuid = (struct kvm_cpuid2 *)qemu_mallocz(size); 52 cpuid->nent = max; 53 r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); 54 if (r == 0 && cpuid->nent >= max) { 55 r = -E2BIG; 56 } 57 if (r < 0) { 58 if (r == -E2BIG) { 59 qemu_free(cpuid); 60 return NULL; 61 } else { 62 fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", 63 strerror(-r)); 64 exit(1); 65 } 66 } 67 return cpuid; 68 } 69 70 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) 71 { 72 struct kvm_cpuid2 *cpuid; 73 int i, max; 74 uint32_t ret = 0; 75 uint32_t cpuid_1_edx; 76 77 if (!kvm_check_extension(env->kvm_state, KVM_CAP_EXT_CPUID)) { 78 return -1U; 79 } 80 81 max = 1; 82 while ((cpuid = try_get_cpuid(env->kvm_state, max)) == NULL) { 83 max *= 2; 84 } 85 86 for (i = 0; i < cpuid->nent; ++i) { 87 if (cpuid->entries[i].function == function) { 88 switch (reg) { 89 case R_EAX: 90 ret = cpuid->entries[i].eax; 91 break; 92 case R_EBX: 93 ret = cpuid->entries[i].ebx; 94 break; 95 case R_ECX: 96 ret = cpuid->entries[i].ecx; 97 break; 98 case R_EDX: 99 ret = cpuid->entries[i].edx; 100 if (function == 0x80000001) { 101 /* On Intel, kvm returns cpuid according to the Intel spec, 102 * so add missing bits according to the AMD spec: 103 */ 104 cpuid_1_edx = kvm_arch_get_supported_cpuid(env, 1, R_EDX); 105 ret |= cpuid_1_edx & 0xdfeff7ff; 106 } 107 break; 108 } 109 } 110 } 111 112 qemu_free(cpuid); 113 114 return ret; 115 } 116 117 #else 118 119 uint32_t kvm_arch_get_supported_cpuid(CPUState *env, uint32_t function, int reg) 120 { 121 return -1U; 122 } 123 124 #endif 125 126 #ifndef KVM_MP_STATE_RUNNABLE 127 #define KVM_MP_STATE_RUNNABLE 0 128 #endif 129 130 int kvm_arch_init_vcpu(CPUState *env) 131 { 132 struct { 133 struct kvm_cpuid2 cpuid; 134 struct kvm_cpuid_entry2 entries[100]; 135 } __attribute__((packed)) cpuid_data; 136 uint32_t limit, i, j, cpuid_i; 137 uint32_t unused; 138 139 env->mp_state = KVM_MP_STATE_RUNNABLE; 140 141 cpuid_i = 0; 142 143 cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); 144 145 for (i = 0; i <= limit; i++) { 146 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++]; 147 148 switch (i) { 149 case 2: { 150 /* Keep reading function 2 till all the input is received */ 151 int times; 152 153 c->function = i; 154 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | 155 KVM_CPUID_FLAG_STATE_READ_NEXT; 156 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 157 times = c->eax & 0xff; 158 159 for (j = 1; j < times; ++j) { 160 c = &cpuid_data.entries[cpuid_i++]; 161 c->function = i; 162 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; 163 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 164 } 165 break; 166 } 167 case 4: 168 case 0xb: 169 case 0xd: 170 for (j = 0; ; j++) { 171 c->function = i; 172 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; 173 c->index = j; 174 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); 175 176 if (i == 4 && c->eax == 0) 177 break; 178 if (i == 0xb && !(c->ecx & 0xff00)) 179 break; 180 if (i == 0xd && c->eax == 0) 181 break; 182 183 c = &cpuid_data.entries[cpuid_i++]; 184 } 185 break; 186 default: 187 c->function = i; 188 c->flags = 0; 189 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 190 break; 191 } 192 } 193 cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); 194 195 for (i = 0x80000000; i <= limit; i++) { 196 struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++]; 197 198 c->function = i; 199 c->flags = 0; 200 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); 201 } 202 203 cpuid_data.cpuid.nent = cpuid_i; 204 205 return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data); 206 } 207 208 static int kvm_has_msr_star(CPUState *env) 209 { 210 static int has_msr_star; 211 int ret; 212 213 /* first time */ 214 if (has_msr_star == 0) { 215 struct kvm_msr_list msr_list, *kvm_msr_list; 216 217 has_msr_star = -1; 218 219 /* Obtain MSR list from KVM. These are the MSRs that we must 220 * save/restore */ 221 msr_list.nmsrs = 0; 222 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); 223 if (ret < 0) 224 return 0; 225 226 kvm_msr_list = qemu_mallocz(sizeof(msr_list) + 227 msr_list.nmsrs * sizeof(msr_list.indices[0])); 228 229 kvm_msr_list->nmsrs = msr_list.nmsrs; 230 ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); 231 if (ret >= 0) { 232 int i; 233 234 for (i = 0; i < kvm_msr_list->nmsrs; i++) { 235 if (kvm_msr_list->indices[i] == MSR_STAR) { 236 has_msr_star = 1; 237 break; 238 } 239 } 240 } 241 242 free(kvm_msr_list); 243 } 244 245 if (has_msr_star == 1) 246 return 1; 247 return 0; 248 } 249 250 int kvm_arch_init(KVMState *s, int smp_cpus) 251 { 252 int ret; 253 254 /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code 255 * directly. In order to use vm86 mode, a TSS is needed. Since this 256 * must be part of guest physical memory, we need to allocate it. Older 257 * versions of KVM just assumed that it would be at the end of physical 258 * memory but that doesn't work with more than 4GB of memory. We simply 259 * refuse to work with those older versions of KVM. */ 260 ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); 261 if (ret <= 0) { 262 fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); 263 return ret; 264 } 265 266 /* this address is 3 pages before the bios, and the bios should present 267 * as unavaible memory. FIXME, need to ensure the e820 map deals with 268 * this? 269 */ 270 return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000); 271 } 272 273 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 274 { 275 lhs->selector = rhs->selector; 276 lhs->base = rhs->base; 277 lhs->limit = rhs->limit; 278 lhs->type = 3; 279 lhs->present = 1; 280 lhs->dpl = 3; 281 lhs->db = 0; 282 lhs->s = 1; 283 lhs->l = 0; 284 lhs->g = 0; 285 lhs->avl = 0; 286 lhs->unusable = 0; 287 } 288 289 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) 290 { 291 unsigned flags = rhs->flags; 292 lhs->selector = rhs->selector; 293 lhs->base = rhs->base; 294 lhs->limit = rhs->limit; 295 lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; 296 lhs->present = (flags & DESC_P_MASK) != 0; 297 lhs->dpl = rhs->selector & 3; 298 lhs->db = (flags >> DESC_B_SHIFT) & 1; 299 lhs->s = (flags & DESC_S_MASK) != 0; 300 lhs->l = (flags >> DESC_L_SHIFT) & 1; 301 lhs->g = (flags & DESC_G_MASK) != 0; 302 lhs->avl = (flags & DESC_AVL_MASK) != 0; 303 lhs->unusable = 0; 304 } 305 306 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) 307 { 308 lhs->selector = rhs->selector; 309 lhs->base = rhs->base; 310 lhs->limit = rhs->limit; 311 lhs->flags = 312 (rhs->type << DESC_TYPE_SHIFT) 313 | (rhs->present * DESC_P_MASK) 314 | (rhs->dpl << DESC_DPL_SHIFT) 315 | (rhs->db << DESC_B_SHIFT) 316 | (rhs->s * DESC_S_MASK) 317 | (rhs->l << DESC_L_SHIFT) 318 | (rhs->g * DESC_G_MASK) 319 | (rhs->avl * DESC_AVL_MASK); 320 } 321 322 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) 323 { 324 if (set) 325 *kvm_reg = *qemu_reg; 326 else 327 *qemu_reg = *kvm_reg; 328 } 329 330 static int kvm_getput_regs(CPUState *env, int set) 331 { 332 struct kvm_regs regs; 333 int ret = 0; 334 335 if (!set) { 336 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); 337 if (ret < 0) 338 return ret; 339 } 340 341 kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); 342 kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); 343 kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); 344 kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); 345 kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); 346 kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); 347 kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); 348 kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); 349 #ifdef TARGET_X86_64 350 kvm_getput_reg(®s.r8, &env->regs[8], set); 351 kvm_getput_reg(®s.r9, &env->regs[9], set); 352 kvm_getput_reg(®s.r10, &env->regs[10], set); 353 kvm_getput_reg(®s.r11, &env->regs[11], set); 354 kvm_getput_reg(®s.r12, &env->regs[12], set); 355 kvm_getput_reg(®s.r13, &env->regs[13], set); 356 kvm_getput_reg(®s.r14, &env->regs[14], set); 357 kvm_getput_reg(®s.r15, &env->regs[15], set); 358 #endif 359 360 kvm_getput_reg(®s.rflags, &env->eflags, set); 361 kvm_getput_reg(®s.rip, &env->eip, set); 362 363 if (set) 364 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); 365 366 return ret; 367 } 368 369 static int kvm_put_fpu(CPUState *env) 370 { 371 struct kvm_fpu fpu; 372 int i; 373 374 memset(&fpu, 0, sizeof fpu); 375 fpu.fsw = env->fpus & ~(7 << 11); 376 fpu.fsw |= (env->fpstt & 7) << 11; 377 fpu.fcw = env->fpuc; 378 for (i = 0; i < 8; ++i) 379 fpu.ftwx |= (!env->fptags[i]) << i; 380 memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); 381 memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); 382 fpu.mxcsr = env->mxcsr; 383 384 return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu); 385 } 386 387 static int kvm_put_sregs(CPUState *env) 388 { 389 struct kvm_sregs sregs; 390 391 memcpy(sregs.interrupt_bitmap, 392 env->interrupt_bitmap, 393 sizeof(sregs.interrupt_bitmap)); 394 395 if ((env->eflags & VM_MASK)) { 396 set_v8086_seg(&sregs.cs, &env->segs[R_CS]); 397 set_v8086_seg(&sregs.ds, &env->segs[R_DS]); 398 set_v8086_seg(&sregs.es, &env->segs[R_ES]); 399 set_v8086_seg(&sregs.fs, &env->segs[R_FS]); 400 set_v8086_seg(&sregs.gs, &env->segs[R_GS]); 401 set_v8086_seg(&sregs.ss, &env->segs[R_SS]); 402 } else { 403 set_seg(&sregs.cs, &env->segs[R_CS]); 404 set_seg(&sregs.ds, &env->segs[R_DS]); 405 set_seg(&sregs.es, &env->segs[R_ES]); 406 set_seg(&sregs.fs, &env->segs[R_FS]); 407 set_seg(&sregs.gs, &env->segs[R_GS]); 408 set_seg(&sregs.ss, &env->segs[R_SS]); 409 410 if (env->cr[0] & CR0_PE_MASK) { 411 /* force ss cpl to cs cpl */ 412 sregs.ss.selector = (sregs.ss.selector & ~3) | 413 (sregs.cs.selector & 3); 414 sregs.ss.dpl = sregs.ss.selector & 3; 415 } 416 } 417 418 set_seg(&sregs.tr, &env->tr); 419 set_seg(&sregs.ldt, &env->ldt); 420 421 sregs.idt.limit = env->idt.limit; 422 sregs.idt.base = env->idt.base; 423 sregs.gdt.limit = env->gdt.limit; 424 sregs.gdt.base = env->gdt.base; 425 426 sregs.cr0 = env->cr[0]; 427 sregs.cr2 = env->cr[2]; 428 sregs.cr3 = env->cr[3]; 429 sregs.cr4 = env->cr[4]; 430 431 sregs.cr8 = cpu_get_apic_tpr(env); 432 sregs.apic_base = cpu_get_apic_base(env); 433 434 sregs.efer = env->efer; 435 436 return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); 437 } 438 439 static void kvm_msr_entry_set(struct kvm_msr_entry *entry, 440 uint32_t index, uint64_t value) 441 { 442 entry->index = index; 443 entry->data = value; 444 } 445 446 static int kvm_put_msrs(CPUState *env) 447 { 448 struct { 449 struct kvm_msrs info; 450 struct kvm_msr_entry entries[100]; 451 } msr_data; 452 struct kvm_msr_entry *msrs = msr_data.entries; 453 int n = 0; 454 455 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); 456 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); 457 kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); 458 if (kvm_has_msr_star(env)) 459 kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); 460 kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); 461 #ifdef TARGET_X86_64 462 /* FIXME if lm capable */ 463 kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); 464 kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); 465 kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); 466 kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); 467 #endif 468 msr_data.info.nmsrs = n; 469 470 return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data); 471 472 } 473 474 475 static int kvm_get_fpu(CPUState *env) 476 { 477 struct kvm_fpu fpu; 478 int i, ret; 479 480 ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu); 481 if (ret < 0) 482 return ret; 483 484 env->fpstt = (fpu.fsw >> 11) & 7; 485 env->fpus = fpu.fsw; 486 env->fpuc = fpu.fcw; 487 for (i = 0; i < 8; ++i) 488 env->fptags[i] = !((fpu.ftwx >> i) & 1); 489 memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); 490 memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); 491 env->mxcsr = fpu.mxcsr; 492 493 return 0; 494 } 495 496 static int kvm_get_sregs(CPUState *env) 497 { 498 struct kvm_sregs sregs; 499 uint32_t hflags; 500 int ret; 501 502 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); 503 if (ret < 0) 504 return ret; 505 506 memcpy(env->interrupt_bitmap, 507 sregs.interrupt_bitmap, 508 sizeof(sregs.interrupt_bitmap)); 509 510 get_seg(&env->segs[R_CS], &sregs.cs); 511 get_seg(&env->segs[R_DS], &sregs.ds); 512 get_seg(&env->segs[R_ES], &sregs.es); 513 get_seg(&env->segs[R_FS], &sregs.fs); 514 get_seg(&env->segs[R_GS], &sregs.gs); 515 get_seg(&env->segs[R_SS], &sregs.ss); 516 517 get_seg(&env->tr, &sregs.tr); 518 get_seg(&env->ldt, &sregs.ldt); 519 520 env->idt.limit = sregs.idt.limit; 521 env->idt.base = sregs.idt.base; 522 env->gdt.limit = sregs.gdt.limit; 523 env->gdt.base = sregs.gdt.base; 524 525 env->cr[0] = sregs.cr0; 526 env->cr[2] = sregs.cr2; 527 env->cr[3] = sregs.cr3; 528 env->cr[4] = sregs.cr4; 529 530 cpu_set_apic_base(env, sregs.apic_base); 531 532 env->efer = sregs.efer; 533 //cpu_set_apic_tpr(env, sregs.cr8); 534 535 #define HFLAG_COPY_MASK ~( \ 536 HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ 537 HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ 538 HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ 539 HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) 540 541 542 543 hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; 544 hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); 545 hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & 546 (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); 547 hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); 548 hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << 549 (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); 550 551 if (env->efer & MSR_EFER_LMA) { 552 hflags |= HF_LMA_MASK; 553 } 554 555 if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) { 556 hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; 557 } else { 558 hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> 559 (DESC_B_SHIFT - HF_CS32_SHIFT); 560 hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> 561 (DESC_B_SHIFT - HF_SS32_SHIFT); 562 if (!(env->cr[0] & CR0_PE_MASK) || 563 (env->eflags & VM_MASK) || 564 !(hflags & HF_CS32_MASK)) { 565 hflags |= HF_ADDSEG_MASK; 566 } else { 567 hflags |= ((env->segs[R_DS].base | 568 env->segs[R_ES].base | 569 env->segs[R_SS].base) != 0) << 570 HF_ADDSEG_SHIFT; 571 } 572 } 573 env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; 574 575 return 0; 576 } 577 578 static int kvm_get_msrs(CPUState *env) 579 { 580 struct { 581 struct kvm_msrs info; 582 struct kvm_msr_entry entries[100]; 583 } msr_data; 584 struct kvm_msr_entry *msrs = msr_data.entries; 585 int ret, i, n; 586 587 n = 0; 588 msrs[n++].index = MSR_IA32_SYSENTER_CS; 589 msrs[n++].index = MSR_IA32_SYSENTER_ESP; 590 msrs[n++].index = MSR_IA32_SYSENTER_EIP; 591 if (kvm_has_msr_star(env)) 592 msrs[n++].index = MSR_STAR; 593 msrs[n++].index = MSR_IA32_TSC; 594 #ifdef TARGET_X86_64 595 /* FIXME lm_capable_kernel */ 596 msrs[n++].index = MSR_CSTAR; 597 msrs[n++].index = MSR_KERNELGSBASE; 598 msrs[n++].index = MSR_FMASK; 599 msrs[n++].index = MSR_LSTAR; 600 #endif 601 msr_data.info.nmsrs = n; 602 ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data); 603 if (ret < 0) 604 return ret; 605 606 for (i = 0; i < ret; i++) { 607 switch (msrs[i].index) { 608 case MSR_IA32_SYSENTER_CS: 609 env->sysenter_cs = msrs[i].data; 610 break; 611 case MSR_IA32_SYSENTER_ESP: 612 env->sysenter_esp = msrs[i].data; 613 break; 614 case MSR_IA32_SYSENTER_EIP: 615 env->sysenter_eip = msrs[i].data; 616 break; 617 case MSR_STAR: 618 env->star = msrs[i].data; 619 break; 620 #ifdef TARGET_X86_64 621 case MSR_CSTAR: 622 env->cstar = msrs[i].data; 623 break; 624 case MSR_KERNELGSBASE: 625 env->kernelgsbase = msrs[i].data; 626 break; 627 case MSR_FMASK: 628 env->fmask = msrs[i].data; 629 break; 630 case MSR_LSTAR: 631 env->lstar = msrs[i].data; 632 break; 633 #endif 634 case MSR_IA32_TSC: 635 env->tsc = msrs[i].data; 636 break; 637 } 638 } 639 640 return 0; 641 } 642 643 int kvm_arch_put_registers(CPUState *env) 644 { 645 int ret; 646 647 ret = kvm_getput_regs(env, 1); 648 if (ret < 0) 649 return ret; 650 651 ret = kvm_put_fpu(env); 652 if (ret < 0) 653 return ret; 654 655 ret = kvm_put_sregs(env); 656 if (ret < 0) 657 return ret; 658 659 ret = kvm_put_msrs(env); 660 if (ret < 0) 661 return ret; 662 663 ret = kvm_put_mp_state(env); 664 if (ret < 0) 665 return ret; 666 667 ret = kvm_get_mp_state(env); 668 if (ret < 0) 669 return ret; 670 671 return 0; 672 } 673 674 int kvm_arch_get_registers(CPUState *env) 675 { 676 int ret; 677 678 ret = kvm_getput_regs(env, 0); 679 if (ret < 0) 680 return ret; 681 682 ret = kvm_get_fpu(env); 683 if (ret < 0) 684 return ret; 685 686 ret = kvm_get_sregs(env); 687 if (ret < 0) 688 return ret; 689 690 ret = kvm_get_msrs(env); 691 if (ret < 0) 692 return ret; 693 694 return 0; 695 } 696 697 int kvm_arch_vcpu_run(CPUState *env) 698 { 699 #ifdef CONFIG_KVM_GS_RESTORE 700 if (gs_need_restore != KVM_GS_RESTORE_NO) 701 return no_gs_ioctl(env->kvm_fd, KVM_RUN, 0); 702 else 703 #endif 704 return kvm_vcpu_ioctl(env, KVM_RUN, 0); 705 } 706 707 int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) 708 { 709 /* Try to inject an interrupt if the guest can accept it */ 710 if (run->ready_for_interrupt_injection && 711 (env->interrupt_request & CPU_INTERRUPT_HARD) && 712 (env->eflags & IF_MASK)) { 713 int irq; 714 715 env->interrupt_request &= ~CPU_INTERRUPT_HARD; 716 irq = cpu_get_pic_interrupt(env); 717 if (irq >= 0) { 718 struct kvm_interrupt intr; 719 intr.irq = irq; 720 /* FIXME: errors */ 721 dprintf("injected interrupt %d\n", irq); 722 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); 723 } 724 } 725 726 /* If we have an interrupt but the guest is not ready to receive an 727 * interrupt, request an interrupt window exit. This will 728 * cause a return to userspace as soon as the guest is ready to 729 * receive interrupts. */ 730 if ((env->interrupt_request & CPU_INTERRUPT_HARD)) 731 run->request_interrupt_window = 1; 732 else 733 run->request_interrupt_window = 0; 734 735 dprintf("setting tpr\n"); 736 run->cr8 = cpu_get_apic_tpr(env); 737 738 #ifdef CONFIG_KVM_GS_RESTORE 739 gs_base_pre_run(); 740 #endif 741 742 return 0; 743 } 744 745 int kvm_arch_post_run(CPUState *env, struct kvm_run *run) 746 { 747 #ifdef CONFIG_KVM_GS_RESTORE 748 gs_base_post_run(); 749 #endif 750 if (run->if_flag) 751 env->eflags |= IF_MASK; 752 else 753 env->eflags &= ~IF_MASK; 754 755 cpu_set_apic_tpr(env, run->cr8); 756 cpu_set_apic_base(env, run->apic_base); 757 758 return 0; 759 } 760 761 static int kvm_handle_halt(CPUState *env) 762 { 763 if (!((env->interrupt_request & CPU_INTERRUPT_HARD) && 764 (env->eflags & IF_MASK)) && 765 !(env->interrupt_request & CPU_INTERRUPT_NMI)) { 766 env->halted = 1; 767 env->exception_index = EXCP_HLT; 768 return 0; 769 } 770 771 return 1; 772 } 773 774 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) 775 { 776 int ret = 0; 777 778 switch (run->exit_reason) { 779 case KVM_EXIT_HLT: 780 dprintf("handle_hlt\n"); 781 ret = kvm_handle_halt(env); 782 break; 783 } 784 785 return ret; 786 } 787 788 #ifdef KVM_CAP_SET_GUEST_DEBUG 789 int kvm_arch_insert_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) 790 { 791 const static uint8_t int3 = 0xcc; 792 793 if (cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || 794 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&int3, 1, 1)) 795 return -EINVAL; 796 return 0; 797 } 798 799 int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) 800 { 801 uint8_t int3; 802 803 if (cpu_memory_rw_debug(env, bp->pc, &int3, 1, 0) || int3 != 0xcc || 804 cpu_memory_rw_debug(env, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) 805 return -EINVAL; 806 return 0; 807 } 808 809 static struct { 810 target_ulong addr; 811 int len; 812 int type; 813 } hw_breakpoint[4]; 814 815 static int nb_hw_breakpoint; 816 817 static int find_hw_breakpoint(target_ulong addr, int len, int type) 818 { 819 int n; 820 821 for (n = 0; n < nb_hw_breakpoint; n++) 822 if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && 823 (hw_breakpoint[n].len == len || len == -1)) 824 return n; 825 return -1; 826 } 827 828 int kvm_arch_insert_hw_breakpoint(target_ulong addr, 829 target_ulong len, int type) 830 { 831 switch (type) { 832 case GDB_BREAKPOINT_HW: 833 len = 1; 834 break; 835 case GDB_WATCHPOINT_WRITE: 836 case GDB_WATCHPOINT_ACCESS: 837 switch (len) { 838 case 1: 839 break; 840 case 2: 841 case 4: 842 case 8: 843 if (addr & (len - 1)) 844 return -EINVAL; 845 break; 846 default: 847 return -EINVAL; 848 } 849 break; 850 default: 851 return -ENOSYS; 852 } 853 854 if (nb_hw_breakpoint == 4) 855 return -ENOBUFS; 856 857 if (find_hw_breakpoint(addr, len, type) >= 0) 858 return -EEXIST; 859 860 hw_breakpoint[nb_hw_breakpoint].addr = addr; 861 hw_breakpoint[nb_hw_breakpoint].len = len; 862 hw_breakpoint[nb_hw_breakpoint].type = type; 863 nb_hw_breakpoint++; 864 865 return 0; 866 } 867 868 int kvm_arch_remove_hw_breakpoint(target_ulong addr, 869 target_ulong len, int type) 870 { 871 int n; 872 873 n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); 874 if (n < 0) 875 return -ENOENT; 876 877 nb_hw_breakpoint--; 878 hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; 879 880 return 0; 881 } 882 883 void kvm_arch_remove_all_hw_breakpoints(void) 884 { 885 nb_hw_breakpoint = 0; 886 } 887 888 static CPUWatchpoint hw_watchpoint; 889 890 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) 891 { 892 int handle = 0; 893 int n; 894 895 if (arch_info->exception == 1) { 896 if (arch_info->dr6 & (1 << 14)) { 897 if (cpu_single_env->singlestep_enabled) 898 handle = 1; 899 } else { 900 for (n = 0; n < 4; n++) 901 if (arch_info->dr6 & (1 << n)) 902 switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { 903 case 0x0: 904 handle = 1; 905 break; 906 case 0x1: 907 handle = 1; 908 cpu_single_env->watchpoint_hit = &hw_watchpoint; 909 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 910 hw_watchpoint.flags = BP_MEM_WRITE; 911 break; 912 case 0x3: 913 handle = 1; 914 cpu_single_env->watchpoint_hit = &hw_watchpoint; 915 hw_watchpoint.vaddr = hw_breakpoint[n].addr; 916 hw_watchpoint.flags = BP_MEM_ACCESS; 917 break; 918 } 919 } 920 } else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info->pc)) 921 handle = 1; 922 923 if (!handle) 924 kvm_update_guest_debug(cpu_single_env, 925 (arch_info->exception == 1) ? 926 KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP); 927 928 return handle; 929 } 930 931 void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) 932 { 933 const uint8_t type_code[] = { 934 [GDB_BREAKPOINT_HW] = 0x0, 935 [GDB_WATCHPOINT_WRITE] = 0x1, 936 [GDB_WATCHPOINT_ACCESS] = 0x3 937 }; 938 const uint8_t len_code[] = { 939 [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2 940 }; 941 int n; 942 943 if (kvm_sw_breakpoints_active(env)) 944 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; 945 946 if (nb_hw_breakpoint > 0) { 947 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; 948 dbg->arch.debugreg[7] = 0x0600; 949 for (n = 0; n < nb_hw_breakpoint; n++) { 950 dbg->arch.debugreg[n] = hw_breakpoint[n].addr; 951 dbg->arch.debugreg[7] |= (2 << (n * 2)) | 952 (type_code[hw_breakpoint[n].type] << (16 + n*4)) | 953 (len_code[hw_breakpoint[n].len] << (18 + n*4)); 954 } 955 } 956 } 957 #endif /* KVM_CAP_SET_GUEST_DEBUG */ 958