Home | History | Annotate | Download | only in target-i386
      1 /*
      2  * QEMU KVM support
      3  *
      4  * Copyright (C) 2006-2008 Qumranet Technologies
      5  * Copyright IBM, Corp. 2008
      6  *
      7  * Authors:
      8  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      9  *
     10  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     11  * See the COPYING file in the top-level directory.
     12  *
     13  */
     14 
     15 #include <sys/types.h>
     16 #include <sys/ioctl.h>
     17 #include <sys/mman.h>
     18 
     19 #undef __user
     20 #define __xuser  /* nothing */
     21 #include <linux/kvm.h>
     22 
     23 #include "qemu-common.h"
     24 #include "sysemu/sysemu.h"
     25 #include "sysemu/kvm.h"
     26 #include "cpu.h"
     27 #include "exec/gdbstub.h"
     28 
     29 #ifdef CONFIG_KVM_GS_RESTORE
     30 #include "kvm-gs-restore.h"
     31 #endif
     32 
     33 //#define DEBUG_KVM
     34 
     35 #ifdef DEBUG_KVM
     36 #define dprintf(fmt, ...) \
     37     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
     38 #else
     39 #define dprintf(fmt, ...) \
     40     do { } while (0)
     41 #endif
     42 
     43 #ifdef KVM_CAP_EXT_CPUID
     44 
     45 static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
     46 {
     47     struct kvm_cpuid2 *cpuid;
     48     int r, size;
     49 
     50     size = sizeof(*cpuid) + max * sizeof(*cpuid->entries);
     51     cpuid = (struct kvm_cpuid2 *)g_malloc0(size);
     52     cpuid->nent = max;
     53     r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid);
     54     if (r == 0 && cpuid->nent >= max) {
     55         r = -E2BIG;
     56     }
     57     if (r < 0) {
     58         if (r == -E2BIG) {
     59             g_free(cpuid);
     60             return NULL;
     61         } else {
     62             fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n",
     63                     strerror(-r));
     64             exit(1);
     65         }
     66     }
     67     return cpuid;
     68 }
     69 
     70 uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
     71 {
     72     struct kvm_cpuid2 *cpuid;
     73     int i, max;
     74     uint32_t ret = 0;
     75     uint32_t cpuid_1_edx;
     76 
     77     if (!kvm_check_extension(cpu->kvm_state, KVM_CAP_EXT_CPUID)) {
     78         return -1U;
     79     }
     80 
     81     max = 1;
     82     while ((cpuid = try_get_cpuid(cpu->kvm_state, max)) == NULL) {
     83         max *= 2;
     84     }
     85 
     86     for (i = 0; i < cpuid->nent; ++i) {
     87         if (cpuid->entries[i].function == function) {
     88             switch (reg) {
     89             case R_EAX:
     90                 ret = cpuid->entries[i].eax;
     91                 break;
     92             case R_EBX:
     93                 ret = cpuid->entries[i].ebx;
     94                 break;
     95             case R_ECX:
     96                 ret = cpuid->entries[i].ecx;
     97                 break;
     98             case R_EDX:
     99                 ret = cpuid->entries[i].edx;
    100                 if (function == 0x80000001) {
    101                     /* On Intel, kvm returns cpuid according to the Intel spec,
    102                      * so add missing bits according to the AMD spec:
    103                      */
    104                     cpuid_1_edx = kvm_arch_get_supported_cpuid(cpu, 1, R_EDX);
    105                     ret |= cpuid_1_edx & 0xdfeff7ff;
    106                 }
    107                 break;
    108             }
    109         }
    110     }
    111 
    112     g_free(cpuid);
    113 
    114     return ret;
    115 }
    116 
    117 #else
    118 
    119 uint32_t kvm_arch_get_supported_cpuid(CPUState *cpu, uint32_t function, int reg)
    120 {
    121     return -1U;
    122 }
    123 
    124 #endif
    125 
    126 #ifndef KVM_MP_STATE_RUNNABLE
    127 #define KVM_MP_STATE_RUNNABLE 0
    128 #endif
    129 
    130 int kvm_arch_init_vcpu(CPUState *cpu)
    131 {
    132     struct {
    133         struct kvm_cpuid2 cpuid;
    134         struct kvm_cpuid_entry2 entries[100];
    135     } __attribute__((packed)) cpuid_data;
    136     uint32_t limit, i, j, cpuid_i;
    137     uint32_t unused;
    138     CPUX86State *env = cpu->env_ptr;
    139 
    140     env->mp_state = KVM_MP_STATE_RUNNABLE;
    141 
    142     cpuid_i = 0;
    143 
    144     cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused);
    145 
    146     for (i = 0; i <= limit; i++) {
    147         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
    148 
    149         switch (i) {
    150         case 2: {
    151             /* Keep reading function 2 till all the input is received */
    152             int times;
    153 
    154             c->function = i;
    155             c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC |
    156                        KVM_CPUID_FLAG_STATE_READ_NEXT;
    157             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
    158             times = c->eax & 0xff;
    159 
    160             for (j = 1; j < times; ++j) {
    161                 c = &cpuid_data.entries[cpuid_i++];
    162                 c->function = i;
    163                 c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC;
    164                 cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
    165             }
    166             break;
    167         }
    168         case 4:
    169         case 0xb:
    170         case 0xd:
    171             for (j = 0; ; j++) {
    172                 c->function = i;
    173                 c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
    174                 c->index = j;
    175                 cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx);
    176 
    177                 if (i == 4 && c->eax == 0)
    178                     break;
    179                 if (i == 0xb && !(c->ecx & 0xff00))
    180                     break;
    181                 if (i == 0xd && c->eax == 0)
    182                     break;
    183 
    184                 c = &cpuid_data.entries[cpuid_i++];
    185             }
    186             break;
    187         default:
    188             c->function = i;
    189             c->flags = 0;
    190             cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
    191             break;
    192         }
    193     }
    194     cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused);
    195 
    196     for (i = 0x80000000; i <= limit; i++) {
    197         struct kvm_cpuid_entry2 *c = &cpuid_data.entries[cpuid_i++];
    198 
    199         c->function = i;
    200         c->flags = 0;
    201         cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx);
    202     }
    203 
    204     cpuid_data.cpuid.nent = cpuid_i;
    205 
    206     return kvm_vcpu_ioctl(cpu, KVM_SET_CPUID2, &cpuid_data);
    207 }
    208 
    209 static int kvm_has_msr_star(CPUX86State *env)
    210 {
    211     static int has_msr_star;
    212     int ret;
    213     CPUState *cpu = ENV_GET_CPU(env);
    214 
    215     /* first time */
    216     if (has_msr_star == 0) {
    217         struct kvm_msr_list msr_list, *kvm_msr_list;
    218 
    219         has_msr_star = -1;
    220 
    221         /* Obtain MSR list from KVM.  These are the MSRs that we must
    222          * save/restore */
    223         msr_list.nmsrs = 0;
    224         ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
    225         if (ret < 0)
    226             return 0;
    227 
    228         kvm_msr_list = g_malloc0(sizeof(msr_list) +
    229                                     msr_list.nmsrs * sizeof(msr_list.indices[0]));
    230 
    231         kvm_msr_list->nmsrs = msr_list.nmsrs;
    232         ret = kvm_ioctl(cpu->kvm_state, KVM_GET_MSR_INDEX_LIST, kvm_msr_list);
    233         if (ret >= 0) {
    234             int i;
    235 
    236             for (i = 0; i < kvm_msr_list->nmsrs; i++) {
    237                 if (kvm_msr_list->indices[i] == MSR_STAR) {
    238                     has_msr_star = 1;
    239                     break;
    240                 }
    241             }
    242         }
    243 
    244         free(kvm_msr_list);
    245     }
    246 
    247     if (has_msr_star == 1)
    248         return 1;
    249     return 0;
    250 }
    251 
    252 int kvm_arch_init(KVMState *s, int smp_cpus)
    253 {
    254     int ret;
    255 
    256     /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
    257      * directly.  In order to use vm86 mode, a TSS is needed.  Since this
    258      * must be part of guest physical memory, we need to allocate it.  Older
    259      * versions of KVM just assumed that it would be at the end of physical
    260      * memory but that doesn't work with more than 4GB of memory.  We simply
    261      * refuse to work with those older versions of KVM. */
    262     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
    263     if (ret <= 0) {
    264         fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n");
    265         return ret;
    266     }
    267 
    268     /* this address is 3 pages before the bios, and the bios should present
    269      * as unavaible memory.  FIXME, need to ensure the e820 map deals with
    270      * this?
    271      */
    272     return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, 0xfffbd000);
    273 }
    274 
    275 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
    276 {
    277     lhs->selector = rhs->selector;
    278     lhs->base = rhs->base;
    279     lhs->limit = rhs->limit;
    280     lhs->type = 3;
    281     lhs->present = 1;
    282     lhs->dpl = 3;
    283     lhs->db = 0;
    284     lhs->s = 1;
    285     lhs->l = 0;
    286     lhs->g = 0;
    287     lhs->avl = 0;
    288     lhs->unusable = 0;
    289 }
    290 
    291 static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
    292 {
    293     unsigned flags = rhs->flags;
    294     lhs->selector = rhs->selector;
    295     lhs->base = rhs->base;
    296     lhs->limit = rhs->limit;
    297     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
    298     lhs->present = (flags & DESC_P_MASK) != 0;
    299     lhs->dpl = rhs->selector & 3;
    300     lhs->db = (flags >> DESC_B_SHIFT) & 1;
    301     lhs->s = (flags & DESC_S_MASK) != 0;
    302     lhs->l = (flags >> DESC_L_SHIFT) & 1;
    303     lhs->g = (flags & DESC_G_MASK) != 0;
    304     lhs->avl = (flags & DESC_AVL_MASK) != 0;
    305     lhs->unusable = 0;
    306 }
    307 
    308 static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs)
    309 {
    310     lhs->selector = rhs->selector;
    311     lhs->base = rhs->base;
    312     lhs->limit = rhs->limit;
    313     lhs->flags =
    314 	(rhs->type << DESC_TYPE_SHIFT)
    315 	| (rhs->present * DESC_P_MASK)
    316 	| (rhs->dpl << DESC_DPL_SHIFT)
    317 	| (rhs->db << DESC_B_SHIFT)
    318 	| (rhs->s * DESC_S_MASK)
    319 	| (rhs->l << DESC_L_SHIFT)
    320 	| (rhs->g * DESC_G_MASK)
    321 	| (rhs->avl * DESC_AVL_MASK);
    322 }
    323 
    324 static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set)
    325 {
    326     if (set)
    327         *kvm_reg = *qemu_reg;
    328     else
    329         *qemu_reg = *kvm_reg;
    330 }
    331 
    332 static int kvm_getput_regs(CPUX86State *env, int set)
    333 {
    334     struct kvm_regs regs;
    335     int ret = 0;
    336 
    337     if (!set) {
    338         ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_REGS, &regs);
    339         if (ret < 0)
    340             return ret;
    341     }
    342 
    343     kvm_getput_reg(&regs.rax, &env->regs[R_EAX], set);
    344     kvm_getput_reg(&regs.rbx, &env->regs[R_EBX], set);
    345     kvm_getput_reg(&regs.rcx, &env->regs[R_ECX], set);
    346     kvm_getput_reg(&regs.rdx, &env->regs[R_EDX], set);
    347     kvm_getput_reg(&regs.rsi, &env->regs[R_ESI], set);
    348     kvm_getput_reg(&regs.rdi, &env->regs[R_EDI], set);
    349     kvm_getput_reg(&regs.rsp, &env->regs[R_ESP], set);
    350     kvm_getput_reg(&regs.rbp, &env->regs[R_EBP], set);
    351 #ifdef TARGET_X86_64
    352     kvm_getput_reg(&regs.r8, &env->regs[8], set);
    353     kvm_getput_reg(&regs.r9, &env->regs[9], set);
    354     kvm_getput_reg(&regs.r10, &env->regs[10], set);
    355     kvm_getput_reg(&regs.r11, &env->regs[11], set);
    356     kvm_getput_reg(&regs.r12, &env->regs[12], set);
    357     kvm_getput_reg(&regs.r13, &env->regs[13], set);
    358     kvm_getput_reg(&regs.r14, &env->regs[14], set);
    359     kvm_getput_reg(&regs.r15, &env->regs[15], set);
    360 #endif
    361 
    362     kvm_getput_reg(&regs.rflags, &env->eflags, set);
    363     kvm_getput_reg(&regs.rip, &env->eip, set);
    364 
    365     if (set)
    366         ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_REGS, &regs);
    367 
    368     return ret;
    369 }
    370 
    371 static int kvm_put_fpu(CPUX86State *env)
    372 {
    373     struct kvm_fpu fpu;
    374     int i;
    375 
    376     memset(&fpu, 0, sizeof fpu);
    377     fpu.fsw = env->fpus & ~(7 << 11);
    378     fpu.fsw |= (env->fpstt & 7) << 11;
    379     fpu.fcw = env->fpuc;
    380     for (i = 0; i < 8; ++i)
    381 	fpu.ftwx |= (!env->fptags[i]) << i;
    382     memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs);
    383     memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs);
    384     fpu.mxcsr = env->mxcsr;
    385 
    386     return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_FPU, &fpu);
    387 }
    388 
    389 static int kvm_put_sregs(CPUX86State *env)
    390 {
    391     struct kvm_sregs sregs;
    392 
    393     memcpy(sregs.interrupt_bitmap,
    394            env->interrupt_bitmap,
    395            sizeof(sregs.interrupt_bitmap));
    396 
    397     if ((env->eflags & VM_MASK)) {
    398 	    set_v8086_seg(&sregs.cs, &env->segs[R_CS]);
    399 	    set_v8086_seg(&sregs.ds, &env->segs[R_DS]);
    400 	    set_v8086_seg(&sregs.es, &env->segs[R_ES]);
    401 	    set_v8086_seg(&sregs.fs, &env->segs[R_FS]);
    402 	    set_v8086_seg(&sregs.gs, &env->segs[R_GS]);
    403 	    set_v8086_seg(&sregs.ss, &env->segs[R_SS]);
    404     } else {
    405 	    set_seg(&sregs.cs, &env->segs[R_CS]);
    406 	    set_seg(&sregs.ds, &env->segs[R_DS]);
    407 	    set_seg(&sregs.es, &env->segs[R_ES]);
    408 	    set_seg(&sregs.fs, &env->segs[R_FS]);
    409 	    set_seg(&sregs.gs, &env->segs[R_GS]);
    410 	    set_seg(&sregs.ss, &env->segs[R_SS]);
    411 
    412 	    if (env->cr[0] & CR0_PE_MASK) {
    413 		/* force ss cpl to cs cpl */
    414 		sregs.ss.selector = (sregs.ss.selector & ~3) |
    415 			(sregs.cs.selector & 3);
    416 		sregs.ss.dpl = sregs.ss.selector & 3;
    417 	    }
    418     }
    419 
    420     set_seg(&sregs.tr, &env->tr);
    421     set_seg(&sregs.ldt, &env->ldt);
    422 
    423     sregs.idt.limit = env->idt.limit;
    424     sregs.idt.base = env->idt.base;
    425     sregs.gdt.limit = env->gdt.limit;
    426     sregs.gdt.base = env->gdt.base;
    427 
    428     sregs.cr0 = env->cr[0];
    429     sregs.cr2 = env->cr[2];
    430     sregs.cr3 = env->cr[3];
    431     sregs.cr4 = env->cr[4];
    432 
    433     sregs.cr8 = cpu_get_apic_tpr(env);
    434     sregs.apic_base = cpu_get_apic_base(env);
    435 
    436     sregs.efer = env->efer;
    437 
    438     return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_SREGS, &sregs);
    439 }
    440 
    441 static void kvm_msr_entry_set(struct kvm_msr_entry *entry,
    442                               uint32_t index, uint64_t value)
    443 {
    444     entry->index = index;
    445     entry->data = value;
    446 }
    447 
    448 static int kvm_put_msrs(CPUX86State *env)
    449 {
    450     struct {
    451         struct kvm_msrs info;
    452         struct kvm_msr_entry entries[100];
    453     } msr_data;
    454     struct kvm_msr_entry *msrs = msr_data.entries;
    455     int n = 0;
    456 
    457     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
    458     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
    459     kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
    460     if (kvm_has_msr_star(env))
    461 	kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
    462     kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
    463 #ifdef TARGET_X86_64
    464     /* FIXME if lm capable */
    465     kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
    466     kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
    467     kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
    468     kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
    469 #endif
    470     msr_data.info.nmsrs = n;
    471 
    472     return kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_SET_MSRS, &msr_data);
    473 
    474 }
    475 
    476 
    477 static int kvm_get_fpu(CPUX86State *env)
    478 {
    479     struct kvm_fpu fpu;
    480     int i, ret;
    481 
    482     ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_FPU, &fpu);
    483     if (ret < 0)
    484         return ret;
    485 
    486     env->fpstt = (fpu.fsw >> 11) & 7;
    487     env->fpus = fpu.fsw;
    488     env->fpuc = fpu.fcw;
    489     for (i = 0; i < 8; ++i)
    490 	env->fptags[i] = !((fpu.ftwx >> i) & 1);
    491     memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs);
    492     memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs);
    493     env->mxcsr = fpu.mxcsr;
    494 
    495     return 0;
    496 }
    497 
    498 int kvm_get_sregs(CPUState *cpu)
    499 {
    500     CPUX86State *env = cpu->env_ptr;
    501     struct kvm_sregs sregs;
    502     uint32_t hflags;
    503     int ret;
    504 
    505     ret = kvm_vcpu_ioctl(cpu, KVM_GET_SREGS, &sregs);
    506     if (ret < 0)
    507         return ret;
    508 
    509     memcpy(env->interrupt_bitmap,
    510            sregs.interrupt_bitmap,
    511            sizeof(sregs.interrupt_bitmap));
    512 
    513     get_seg(&env->segs[R_CS], &sregs.cs);
    514     get_seg(&env->segs[R_DS], &sregs.ds);
    515     get_seg(&env->segs[R_ES], &sregs.es);
    516     get_seg(&env->segs[R_FS], &sregs.fs);
    517     get_seg(&env->segs[R_GS], &sregs.gs);
    518     get_seg(&env->segs[R_SS], &sregs.ss);
    519 
    520     get_seg(&env->tr, &sregs.tr);
    521     get_seg(&env->ldt, &sregs.ldt);
    522 
    523     env->idt.limit = sregs.idt.limit;
    524     env->idt.base = sregs.idt.base;
    525     env->gdt.limit = sregs.gdt.limit;
    526     env->gdt.base = sregs.gdt.base;
    527 
    528     env->cr[0] = sregs.cr0;
    529     env->cr[2] = sregs.cr2;
    530     env->cr[3] = sregs.cr3;
    531     env->cr[4] = sregs.cr4;
    532 
    533     cpu_set_apic_base(env, sregs.apic_base);
    534 
    535     env->efer = sregs.efer;
    536     //cpu_set_apic_tpr(env, sregs.cr8);
    537 
    538 #define HFLAG_COPY_MASK ~( \
    539 			HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
    540 			HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
    541 			HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
    542 			HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)
    543 
    544 
    545 
    546     hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
    547     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
    548     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
    549 	    (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
    550     hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
    551     hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
    552 	    (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
    553 
    554     if (env->efer & MSR_EFER_LMA) {
    555         hflags |= HF_LMA_MASK;
    556     }
    557 
    558     if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
    559         hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
    560     } else {
    561         hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
    562 		(DESC_B_SHIFT - HF_CS32_SHIFT);
    563         hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
    564 		(DESC_B_SHIFT - HF_SS32_SHIFT);
    565         if (!(env->cr[0] & CR0_PE_MASK) ||
    566                    (env->eflags & VM_MASK) ||
    567                    !(hflags & HF_CS32_MASK)) {
    568                 hflags |= HF_ADDSEG_MASK;
    569             } else {
    570                 hflags |= ((env->segs[R_DS].base |
    571                                 env->segs[R_ES].base |
    572                                 env->segs[R_SS].base) != 0) <<
    573                     HF_ADDSEG_SHIFT;
    574             }
    575     }
    576     env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
    577 
    578     return 0;
    579 }
    580 
    581 static int kvm_get_msrs(CPUX86State *env)
    582 {
    583     struct {
    584         struct kvm_msrs info;
    585         struct kvm_msr_entry entries[100];
    586     } msr_data;
    587     struct kvm_msr_entry *msrs = msr_data.entries;
    588     int ret, i, n;
    589 
    590     n = 0;
    591     msrs[n++].index = MSR_IA32_SYSENTER_CS;
    592     msrs[n++].index = MSR_IA32_SYSENTER_ESP;
    593     msrs[n++].index = MSR_IA32_SYSENTER_EIP;
    594     if (kvm_has_msr_star(env))
    595 	msrs[n++].index = MSR_STAR;
    596     msrs[n++].index = MSR_IA32_TSC;
    597 #ifdef TARGET_X86_64
    598     /* FIXME lm_capable_kernel */
    599     msrs[n++].index = MSR_CSTAR;
    600     msrs[n++].index = MSR_KERNELGSBASE;
    601     msrs[n++].index = MSR_FMASK;
    602     msrs[n++].index = MSR_LSTAR;
    603 #endif
    604     msr_data.info.nmsrs = n;
    605     ret = kvm_vcpu_ioctl(ENV_GET_CPU(env), KVM_GET_MSRS, &msr_data);
    606     if (ret < 0)
    607         return ret;
    608 
    609     for (i = 0; i < ret; i++) {
    610         switch (msrs[i].index) {
    611         case MSR_IA32_SYSENTER_CS:
    612             env->sysenter_cs = msrs[i].data;
    613             break;
    614         case MSR_IA32_SYSENTER_ESP:
    615             env->sysenter_esp = msrs[i].data;
    616             break;
    617         case MSR_IA32_SYSENTER_EIP:
    618             env->sysenter_eip = msrs[i].data;
    619             break;
    620         case MSR_STAR:
    621             env->star = msrs[i].data;
    622             break;
    623 #ifdef TARGET_X86_64
    624         case MSR_CSTAR:
    625             env->cstar = msrs[i].data;
    626             break;
    627         case MSR_KERNELGSBASE:
    628             env->kernelgsbase = msrs[i].data;
    629             break;
    630         case MSR_FMASK:
    631             env->fmask = msrs[i].data;
    632             break;
    633         case MSR_LSTAR:
    634             env->lstar = msrs[i].data;
    635             break;
    636 #endif
    637         case MSR_IA32_TSC:
    638             env->tsc = msrs[i].data;
    639             break;
    640         }
    641     }
    642 
    643     return 0;
    644 }
    645 
    646 int kvm_arch_put_registers(CPUState *cpu)
    647 {
    648     CPUX86State *env = cpu->env_ptr;
    649     int ret;
    650 
    651     ret = kvm_getput_regs(env, 1);
    652     if (ret < 0)
    653         return ret;
    654 
    655     ret = kvm_put_fpu(env);
    656     if (ret < 0)
    657         return ret;
    658 
    659     ret = kvm_put_sregs(env);
    660     if (ret < 0)
    661         return ret;
    662 
    663     ret = kvm_put_msrs(env);
    664     if (ret < 0)
    665         return ret;
    666 
    667     ret = kvm_put_mp_state(cpu);
    668     if (ret < 0)
    669         return ret;
    670 
    671     ret = kvm_get_mp_state(cpu);
    672     if (ret < 0)
    673         return ret;
    674 
    675     return 0;
    676 }
    677 
    678 int kvm_arch_get_registers(CPUState *cpu)
    679 {
    680     int ret;
    681     CPUX86State *env = cpu->env_ptr;
    682 
    683     ret = kvm_getput_regs(env, 0);
    684     if (ret < 0)
    685         return ret;
    686 
    687     ret = kvm_get_fpu(env);
    688     if (ret < 0)
    689         return ret;
    690 
    691     ret = kvm_get_sregs(cpu);
    692     if (ret < 0)
    693         return ret;
    694 
    695     ret = kvm_get_msrs(env);
    696     if (ret < 0)
    697         return ret;
    698 
    699     return 0;
    700 }
    701 
    702 int kvm_arch_vcpu_run(CPUState *cpu)
    703 {
    704 #ifdef CONFIG_KVM_GS_RESTORE
    705     if (gs_need_restore  != KVM_GS_RESTORE_NO)
    706         return no_gs_ioctl(cpu->kvm_fd, KVM_RUN, 0);
    707     else
    708 #endif
    709         return kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
    710 }
    711 
    712 int kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
    713 {
    714     CPUX86State *env = cpu->env_ptr;
    715 
    716     /* Try to inject an interrupt if the guest can accept it */
    717     if (run->ready_for_interrupt_injection &&
    718         (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    719         (env->eflags & IF_MASK)) {
    720         int irq;
    721 
    722         cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
    723         irq = cpu_get_pic_interrupt(env);
    724         if (irq >= 0) {
    725             struct kvm_interrupt intr;
    726             intr.irq = irq;
    727             /* FIXME: errors */
    728             dprintf("injected interrupt %d\n", irq);
    729             kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr);
    730         }
    731     }
    732 
    733     /* If we have an interrupt but the guest is not ready to receive an
    734      * interrupt, request an interrupt window exit.  This will
    735      * cause a return to userspace as soon as the guest is ready to
    736      * receive interrupts. */
    737     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD))
    738         run->request_interrupt_window = 1;
    739     else
    740         run->request_interrupt_window = 0;
    741 
    742     dprintf("setting tpr\n");
    743     run->cr8 = cpu_get_apic_tpr(env);
    744 
    745 #ifdef CONFIG_KVM_GS_RESTORE
    746     gs_base_pre_run();
    747 #endif
    748 
    749     return 0;
    750 }
    751 
    752 int kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
    753 {
    754     CPUX86State *env = cpu->env_ptr;
    755 #ifdef CONFIG_KVM_GS_RESTORE
    756     gs_base_post_run();
    757 #endif
    758     if (run->if_flag)
    759         env->eflags |= IF_MASK;
    760     else
    761         env->eflags &= ~IF_MASK;
    762 
    763     cpu_set_apic_tpr(env, run->cr8);
    764     cpu_set_apic_base(env, run->apic_base);
    765 
    766     return 0;
    767 }
    768 
    769 static int kvm_handle_halt(CPUState *cpu)
    770 {
    771     CPUX86State *env = cpu->env_ptr;
    772 
    773     if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
    774           (env->eflags & IF_MASK)) &&
    775         !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
    776         cpu->halted = 1;
    777         env->exception_index = EXCP_HLT;
    778         return 0;
    779     }
    780 
    781     return 1;
    782 }
    783 
    784 int kvm_arch_handle_exit(CPUState *cpu, struct kvm_run *run)
    785 {
    786     int ret = 0;
    787 
    788     switch (run->exit_reason) {
    789     case KVM_EXIT_HLT:
    790         dprintf("handle_hlt\n");
    791         ret = kvm_handle_halt(cpu);
    792         break;
    793     }
    794 
    795     return ret;
    796 }
    797 
    798 #ifdef KVM_CAP_SET_GUEST_DEBUG
    799 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
    800 {
    801     const static uint8_t int3 = 0xcc;
    802 
    803     if (cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) ||
    804         cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&int3, 1, 1))
    805         return -EINVAL;
    806     return 0;
    807 }
    808 
    809 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
    810 {
    811     uint8_t int3;
    812 
    813     if (cpu_memory_rw_debug(cpu, bp->pc, &int3, 1, 0) || int3 != 0xcc ||
    814         cpu_memory_rw_debug(cpu, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1))
    815         return -EINVAL;
    816     return 0;
    817 }
    818 
    819 static struct {
    820     target_ulong addr;
    821     int len;
    822     int type;
    823 } hw_breakpoint[4];
    824 
    825 static int nb_hw_breakpoint;
    826 
    827 static int find_hw_breakpoint(target_ulong addr, int len, int type)
    828 {
    829     int n;
    830 
    831     for (n = 0; n < nb_hw_breakpoint; n++)
    832         if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type &&
    833             (hw_breakpoint[n].len == len || len == -1))
    834             return n;
    835     return -1;
    836 }
    837 
    838 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
    839                                   target_ulong len, int type)
    840 {
    841     switch (type) {
    842     case GDB_BREAKPOINT_HW:
    843         len = 1;
    844         break;
    845     case GDB_WATCHPOINT_WRITE:
    846     case GDB_WATCHPOINT_ACCESS:
    847         switch (len) {
    848         case 1:
    849             break;
    850         case 2:
    851         case 4:
    852         case 8:
    853             if (addr & (len - 1))
    854                 return -EINVAL;
    855             break;
    856         default:
    857             return -EINVAL;
    858         }
    859         break;
    860     default:
    861         return -ENOSYS;
    862     }
    863 
    864     if (nb_hw_breakpoint == 4)
    865         return -ENOBUFS;
    866 
    867     if (find_hw_breakpoint(addr, len, type) >= 0)
    868         return -EEXIST;
    869 
    870     hw_breakpoint[nb_hw_breakpoint].addr = addr;
    871     hw_breakpoint[nb_hw_breakpoint].len = len;
    872     hw_breakpoint[nb_hw_breakpoint].type = type;
    873     nb_hw_breakpoint++;
    874 
    875     return 0;
    876 }
    877 
    878 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
    879                                   target_ulong len, int type)
    880 {
    881     int n;
    882 
    883     n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type);
    884     if (n < 0)
    885         return -ENOENT;
    886 
    887     nb_hw_breakpoint--;
    888     hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint];
    889 
    890     return 0;
    891 }
    892 
    893 void kvm_arch_remove_all_hw_breakpoints(void)
    894 {
    895     nb_hw_breakpoint = 0;
    896 }
    897 
    898 static CPUWatchpoint hw_watchpoint;
    899 
    900 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
    901 {
    902     int handle = 0;
    903     int n;
    904 
    905     if (arch_info->exception == 1) {
    906         if (arch_info->dr6 & (1 << 14)) {
    907             if (current_cpu->singlestep_enabled)
    908                 handle = 1;
    909         } else {
    910             for (n = 0; n < 4; n++)
    911                 if (arch_info->dr6 & (1 << n))
    912                     switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) {
    913                     case 0x0:
    914                         handle = 1;
    915                         break;
    916                     case 0x1:
    917                         handle = 1;
    918                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
    919                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
    920                         hw_watchpoint.flags = BP_MEM_WRITE;
    921                         break;
    922                     case 0x3:
    923                         handle = 1;
    924                         cpu_single_env->watchpoint_hit = &hw_watchpoint;
    925                         hw_watchpoint.vaddr = hw_breakpoint[n].addr;
    926                         hw_watchpoint.flags = BP_MEM_ACCESS;
    927                         break;
    928                     }
    929         }
    930     } else if (kvm_find_sw_breakpoint(current_cpu, arch_info->pc))
    931         handle = 1;
    932 
    933     if (!handle)
    934         kvm_update_guest_debug(current_cpu,
    935                         (arch_info->exception == 1) ?
    936                         KVM_GUESTDBG_INJECT_DB : KVM_GUESTDBG_INJECT_BP);
    937 
    938     return handle;
    939 }
    940 
    941 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
    942 {
    943     const uint8_t type_code[] = {
    944         [GDB_BREAKPOINT_HW] = 0x0,
    945         [GDB_WATCHPOINT_WRITE] = 0x1,
    946         [GDB_WATCHPOINT_ACCESS] = 0x3
    947     };
    948     const uint8_t len_code[] = {
    949         [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2
    950     };
    951     int n;
    952 
    953     if (kvm_sw_breakpoints_active(cpu))
    954         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
    955 
    956     if (nb_hw_breakpoint > 0) {
    957         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
    958         dbg->arch.debugreg[7] = 0x0600;
    959         for (n = 0; n < nb_hw_breakpoint; n++) {
    960             dbg->arch.debugreg[n] = hw_breakpoint[n].addr;
    961             dbg->arch.debugreg[7] |= (2 << (n * 2)) |
    962                 (type_code[hw_breakpoint[n].type] << (16 + n*4)) |
    963                 (len_code[hw_breakpoint[n].len] << (18 + n*4));
    964         }
    965     }
    966 }
    967 #endif /* KVM_CAP_SET_GUEST_DEBUG */
    968