Home | History | Annotate | Download | only in qemu
      1 /*
      2  * QEMU KVM support
      3  *
      4  * Copyright IBM, Corp. 2008
      5  *           Red Hat, Inc. 2008
      6  *
      7  * Authors:
      8  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      9  *  Glauber Costa     <gcosta (at) redhat.com>
     10  *
     11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     12  * See the COPYING file in the top-level directory.
     13  *
     14  */
     15 
     16 #include <sys/types.h>
     17 #include <sys/ioctl.h>
     18 #include <sys/mman.h>
     19 #include <stdarg.h>
     20 
     21 #include <linux/kvm.h>
     22 
     23 #include "qemu-common.h"
     24 #include "sysemu.h"
     25 #include "hw/hw.h"
     26 #include "gdbstub.h"
     27 #include "kvm.h"
     28 
     29 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
     30 #define PAGE_SIZE TARGET_PAGE_SIZE
     31 
     32 //#define DEBUG_KVM
     33 
     34 #ifdef DEBUG_KVM
     35 #define dprintf(fmt, ...) \
     36     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
     37 #else
     38 #define dprintf(fmt, ...) \
     39     do { } while (0)
     40 #endif
     41 
     42 typedef struct KVMSlot
     43 {
     44     target_phys_addr_t start_addr;
     45     ram_addr_t memory_size;
     46     ram_addr_t phys_offset;
     47     int slot;
     48     int flags;
     49 } KVMSlot;
     50 
     51 typedef struct kvm_dirty_log KVMDirtyLog;
     52 
     53 int kvm_allowed = 0;
     54 
     55 struct KVMState
     56 {
     57     KVMSlot slots[32];
     58     int fd;
     59     int vmfd;
     60     int coalesced_mmio;
     61     int broken_set_mem_region;
     62     int migration_log;
     63 #ifdef KVM_CAP_SET_GUEST_DEBUG
     64     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
     65 #endif
     66 };
     67 
     68 static KVMState *kvm_state;
     69 
     70 static KVMSlot *kvm_alloc_slot(KVMState *s)
     71 {
     72     int i;
     73 
     74     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     75         /* KVM private memory slots */
     76         if (i >= 8 && i < 12)
     77             continue;
     78         if (s->slots[i].memory_size == 0)
     79             return &s->slots[i];
     80     }
     81 
     82     fprintf(stderr, "%s: no free slot available\n", __func__);
     83     abort();
     84 }
     85 
     86 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
     87                                          target_phys_addr_t start_addr,
     88                                          target_phys_addr_t end_addr)
     89 {
     90     int i;
     91 
     92     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     93         KVMSlot *mem = &s->slots[i];
     94 
     95         if (start_addr == mem->start_addr &&
     96             end_addr == mem->start_addr + mem->memory_size) {
     97             return mem;
     98         }
     99     }
    100 
    101     return NULL;
    102 }
    103 
    104 /*
    105  * Find overlapping slot with lowest start address
    106  */
    107 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
    108                                             target_phys_addr_t start_addr,
    109                                             target_phys_addr_t end_addr)
    110 {
    111     KVMSlot *found = NULL;
    112     int i;
    113 
    114     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    115         KVMSlot *mem = &s->slots[i];
    116 
    117         if (mem->memory_size == 0 ||
    118             (found && found->start_addr < mem->start_addr)) {
    119             continue;
    120         }
    121 
    122         if (end_addr > mem->start_addr &&
    123             start_addr < mem->start_addr + mem->memory_size) {
    124             found = mem;
    125         }
    126     }
    127 
    128     return found;
    129 }
    130 
    131 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
    132 {
    133     struct kvm_userspace_memory_region mem;
    134 
    135     mem.slot = slot->slot;
    136     mem.guest_phys_addr = slot->start_addr;
    137     mem.memory_size = slot->memory_size;
    138     mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
    139     mem.flags = slot->flags;
    140     if (s->migration_log) {
    141         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    142     }
    143     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
    144 }
    145 
    146 
    147 int kvm_init_vcpu(CPUState *env)
    148 {
    149     KVMState *s = kvm_state;
    150     long mmap_size;
    151     int ret;
    152 
    153     dprintf("kvm_init_vcpu\n");
    154 
    155     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
    156     if (ret < 0) {
    157         dprintf("kvm_create_vcpu failed\n");
    158         goto err;
    159     }
    160 
    161     env->kvm_fd = ret;
    162     env->kvm_state = s;
    163 
    164     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    165     if (mmap_size < 0) {
    166         dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
    167         goto err;
    168     }
    169 
    170     env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
    171                         env->kvm_fd, 0);
    172     if (env->kvm_run == MAP_FAILED) {
    173         ret = -errno;
    174         dprintf("mmap'ing vcpu state failed\n");
    175         goto err;
    176     }
    177 
    178     ret = kvm_arch_init_vcpu(env);
    179 
    180 err:
    181     return ret;
    182 }
    183 
    184 int kvm_put_mp_state(CPUState *env)
    185 {
    186     struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
    187 
    188     return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
    189 }
    190 
    191 int kvm_get_mp_state(CPUState *env)
    192 {
    193     struct kvm_mp_state mp_state;
    194     int ret;
    195 
    196     ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
    197     if (ret < 0) {
    198         return ret;
    199     }
    200     env->mp_state = mp_state.mp_state;
    201     return 0;
    202 }
    203 
    204 int kvm_sync_vcpus(void)
    205 {
    206     CPUState *env;
    207 
    208     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    209         int ret;
    210 
    211         ret = kvm_arch_put_registers(env);
    212         if (ret)
    213             return ret;
    214     }
    215 
    216     return 0;
    217 }
    218 
    219 /*
    220  * dirty pages logging control
    221  */
    222 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
    223                                       ram_addr_t size, int flags, int mask)
    224 {
    225     KVMState *s = kvm_state;
    226     KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
    227     int old_flags;
    228 
    229     if (mem == NULL)  {
    230             fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
    231                     TARGET_FMT_plx "\n", __func__, phys_addr,
    232                     phys_addr + size - 1);
    233             return -EINVAL;
    234     }
    235 
    236     old_flags = mem->flags;
    237 
    238     flags = (mem->flags & ~mask) | flags;
    239     mem->flags = flags;
    240 
    241     /* If nothing changed effectively, no need to issue ioctl */
    242     if (s->migration_log) {
    243         flags |= KVM_MEM_LOG_DIRTY_PAGES;
    244     }
    245     if (flags == old_flags) {
    246             return 0;
    247     }
    248 
    249     return kvm_set_user_memory_region(s, mem);
    250 }
    251 
    252 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
    253 {
    254         return kvm_dirty_pages_log_change(phys_addr, size,
    255                                           KVM_MEM_LOG_DIRTY_PAGES,
    256                                           KVM_MEM_LOG_DIRTY_PAGES);
    257 }
    258 
    259 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
    260 {
    261         return kvm_dirty_pages_log_change(phys_addr, size,
    262                                           0,
    263                                           KVM_MEM_LOG_DIRTY_PAGES);
    264 }
    265 
    266 int kvm_set_migration_log(int enable)
    267 {
    268     KVMState *s = kvm_state;
    269     KVMSlot *mem;
    270     int i, err;
    271 
    272     s->migration_log = enable;
    273 
    274     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    275         mem = &s->slots[i];
    276 
    277         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
    278             continue;
    279         }
    280         err = kvm_set_user_memory_region(s, mem);
    281         if (err) {
    282             return err;
    283         }
    284     }
    285     return 0;
    286 }
    287 
    288 /**
    289  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
    290  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
    291  * This means all bits are set to dirty.
    292  *
    293  * @start_add: start of logged region.
    294  * @end_addr: end of logged region.
    295  */
    296 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
    297                                    target_phys_addr_t end_addr)
    298 {
    299     KVMState *s = kvm_state;
    300     unsigned long size, allocated_size = 0;
    301     target_phys_addr_t phys_addr;
    302     ram_addr_t addr;
    303     KVMDirtyLog d;
    304     KVMSlot *mem;
    305     int ret = 0;
    306 
    307     d.dirty_bitmap = NULL;
    308     while (start_addr < end_addr) {
    309         mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
    310         if (mem == NULL) {
    311             break;
    312         }
    313 
    314         size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
    315         if (!d.dirty_bitmap) {
    316             d.dirty_bitmap = qemu_malloc(size);
    317         } else if (size > allocated_size) {
    318             d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
    319         }
    320         allocated_size = size;
    321         memset(d.dirty_bitmap, 0, allocated_size);
    322 
    323         d.slot = mem->slot;
    324 
    325         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
    326             dprintf("ioctl failed %d\n", errno);
    327             ret = -1;
    328             break;
    329         }
    330 
    331         for (phys_addr = mem->start_addr, addr = mem->phys_offset;
    332              phys_addr < mem->start_addr + mem->memory_size;
    333              phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
    334             unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
    335             unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
    336             unsigned word = nr / (sizeof(*bitmap) * 8);
    337             unsigned bit = nr % (sizeof(*bitmap) * 8);
    338 
    339             if ((bitmap[word] >> bit) & 1) {
    340                 cpu_physical_memory_set_dirty(addr);
    341             }
    342         }
    343         start_addr = phys_addr;
    344     }
    345     qemu_free(d.dirty_bitmap);
    346 
    347     return ret;
    348 }
    349 
    350 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
    351 {
    352     int ret = -ENOSYS;
    353 #ifdef KVM_CAP_COALESCED_MMIO
    354     KVMState *s = kvm_state;
    355 
    356     if (s->coalesced_mmio) {
    357         struct kvm_coalesced_mmio_zone zone;
    358 
    359         zone.addr = start;
    360         zone.size = size;
    361 
    362         ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    363     }
    364 #endif
    365 
    366     return ret;
    367 }
    368 
    369 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
    370 {
    371     int ret = -ENOSYS;
    372 #ifdef KVM_CAP_COALESCED_MMIO
    373     KVMState *s = kvm_state;
    374 
    375     if (s->coalesced_mmio) {
    376         struct kvm_coalesced_mmio_zone zone;
    377 
    378         zone.addr = start;
    379         zone.size = size;
    380 
    381         ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    382     }
    383 #endif
    384 
    385     return ret;
    386 }
    387 
    388 int kvm_check_extension(KVMState *s, unsigned int extension)
    389 {
    390     int ret;
    391 
    392     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    393     if (ret < 0) {
    394         ret = 0;
    395     }
    396 
    397     return ret;
    398 }
    399 
    400 static void kvm_reset_vcpus(void *opaque)
    401 {
    402     kvm_sync_vcpus();
    403 }
    404 
    405 int kvm_init(int smp_cpus)
    406 {
    407     static const char upgrade_note[] =
    408         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
    409         "(see http://sourceforge.net/projects/kvm).\n";
    410     KVMState *s;
    411     int ret;
    412     int i;
    413 
    414     if (smp_cpus > 1) {
    415         fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
    416         return -EINVAL;
    417     }
    418 
    419     s = qemu_mallocz(sizeof(KVMState));
    420 
    421 #ifdef KVM_CAP_SET_GUEST_DEBUG
    422     QTAILQ_INIT(&s->kvm_sw_breakpoints);
    423 #endif
    424     for (i = 0; i < ARRAY_SIZE(s->slots); i++)
    425         s->slots[i].slot = i;
    426 
    427     s->vmfd = -1;
    428     s->fd = open("/dev/kvm", O_RDWR);
    429     if (s->fd == -1) {
    430         fprintf(stderr, "Could not access KVM kernel module: %m\n");
    431         ret = -errno;
    432         goto err;
    433     }
    434 
    435     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    436     if (ret < KVM_API_VERSION) {
    437         if (ret > 0)
    438             ret = -EINVAL;
    439         fprintf(stderr, "kvm version too old\n");
    440         goto err;
    441     }
    442 
    443     if (ret > KVM_API_VERSION) {
    444         ret = -EINVAL;
    445         fprintf(stderr, "kvm version not supported\n");
    446         goto err;
    447     }
    448 
    449     s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
    450     if (s->vmfd < 0)
    451         goto err;
    452 
    453     /* initially, KVM allocated its own memory and we had to jump through
    454      * hooks to make phys_ram_base point to this.  Modern versions of KVM
    455      * just use a user allocated buffer so we can use regular pages
    456      * unmodified.  Make sure we have a sufficiently modern version of KVM.
    457      */
    458     if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
    459         ret = -EINVAL;
    460         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
    461                 upgrade_note);
    462         goto err;
    463     }
    464 
    465     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
    466      * destroyed properly.  Since we rely on this capability, refuse to work
    467      * with any kernel without this capability. */
    468     if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
    469         ret = -EINVAL;
    470 
    471         fprintf(stderr,
    472                 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
    473                 upgrade_note);
    474         goto err;
    475     }
    476 
    477 #ifdef KVM_CAP_COALESCED_MMIO
    478     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
    479 #else
    480     s->coalesced_mmio = 0;
    481 #endif
    482 
    483     s->broken_set_mem_region = 1;
    484 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
    485     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
    486     if (ret > 0) {
    487         s->broken_set_mem_region = 0;
    488     }
    489 #endif
    490 
    491     ret = kvm_arch_init(s, smp_cpus);
    492     if (ret < 0)
    493         goto err;
    494 
    495     qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
    496 
    497     kvm_state = s;
    498 
    499     return 0;
    500 
    501 err:
    502     if (s) {
    503         if (s->vmfd != -1)
    504             close(s->vmfd);
    505         if (s->fd != -1)
    506             close(s->fd);
    507     }
    508     qemu_free(s);
    509 
    510     return ret;
    511 }
    512 
    513 static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
    514                          int direction, int size, uint32_t count)
    515 {
    516     int i;
    517     uint8_t *ptr = data;
    518 
    519     for (i = 0; i < count; i++) {
    520         if (direction == KVM_EXIT_IO_IN) {
    521             switch (size) {
    522             case 1:
    523                 stb_p(ptr, cpu_inb(port));
    524                 break;
    525             case 2:
    526                 stw_p(ptr, cpu_inw(port));
    527                 break;
    528             case 4:
    529                 stl_p(ptr, cpu_inl(port));
    530                 break;
    531             }
    532         } else {
    533             switch (size) {
    534             case 1:
    535                 cpu_outb(port, ldub_p(ptr));
    536                 break;
    537             case 2:
    538                 cpu_outw(port, lduw_p(ptr));
    539                 break;
    540             case 4:
    541                 cpu_outl(port, ldl_p(ptr));
    542                 break;
    543             }
    544         }
    545 
    546         ptr += size;
    547     }
    548 
    549     return 1;
    550 }
    551 
    552 static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
    553 {
    554 #ifdef KVM_CAP_COALESCED_MMIO
    555     KVMState *s = kvm_state;
    556     if (s->coalesced_mmio) {
    557         struct kvm_coalesced_mmio_ring *ring;
    558 
    559         ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
    560         while (ring->first != ring->last) {
    561             struct kvm_coalesced_mmio *ent;
    562 
    563             ent = &ring->coalesced_mmio[ring->first];
    564 
    565             cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
    566             /* FIXME smp_wmb() */
    567             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
    568         }
    569     }
    570 #endif
    571 }
    572 
    573 int kvm_cpu_exec(CPUState *env)
    574 {
    575     struct kvm_run *run = env->kvm_run;
    576     int ret;
    577 
    578     dprintf("kvm_cpu_exec()\n");
    579 
    580     do {
    581         if (env->exit_request) {
    582             dprintf("interrupt exit requested\n");
    583             ret = 0;
    584             break;
    585         }
    586 
    587         kvm_arch_pre_run(env, run);
    588         ret = kvm_arch_vcpu_run(env);
    589         kvm_arch_post_run(env, run);
    590 
    591         if (ret == -EINTR || ret == -EAGAIN) {
    592             dprintf("io window exit\n");
    593             ret = 0;
    594             break;
    595         }
    596 
    597         if (ret < 0) {
    598             dprintf("kvm run failed %s\n", strerror(-ret));
    599             abort();
    600         }
    601 
    602         kvm_run_coalesced_mmio(env, run);
    603 
    604         ret = 0; /* exit loop */
    605         switch (run->exit_reason) {
    606         case KVM_EXIT_IO:
    607             dprintf("handle_io\n");
    608             ret = kvm_handle_io(env, run->io.port,
    609                                 (uint8_t *)run + run->io.data_offset,
    610                                 run->io.direction,
    611                                 run->io.size,
    612                                 run->io.count);
    613             break;
    614         case KVM_EXIT_MMIO:
    615             dprintf("handle_mmio\n");
    616             cpu_physical_memory_rw(run->mmio.phys_addr,
    617                                    run->mmio.data,
    618                                    run->mmio.len,
    619                                    run->mmio.is_write);
    620             ret = 1;
    621             break;
    622         case KVM_EXIT_IRQ_WINDOW_OPEN:
    623             dprintf("irq_window_open\n");
    624             break;
    625         case KVM_EXIT_SHUTDOWN:
    626             dprintf("shutdown\n");
    627             qemu_system_reset_request();
    628             ret = 1;
    629             break;
    630         case KVM_EXIT_UNKNOWN:
    631             dprintf("kvm_exit_unknown\n");
    632             break;
    633         case KVM_EXIT_FAIL_ENTRY:
    634             dprintf("kvm_exit_fail_entry\n");
    635             break;
    636         case KVM_EXIT_EXCEPTION:
    637             dprintf("kvm_exit_exception\n");
    638             break;
    639         case KVM_EXIT_DEBUG:
    640             dprintf("kvm_exit_debug\n");
    641 #ifdef KVM_CAP_SET_GUEST_DEBUG
    642             if (kvm_arch_debug(&run->debug.arch)) {
    643                 gdb_set_stop_cpu(env);
    644                 vm_stop(EXCP_DEBUG);
    645                 env->exception_index = EXCP_DEBUG;
    646                 return 0;
    647             }
    648             /* re-enter, this exception was guest-internal */
    649             ret = 1;
    650 #endif /* KVM_CAP_SET_GUEST_DEBUG */
    651             break;
    652         default:
    653             dprintf("kvm_arch_handle_exit\n");
    654             ret = kvm_arch_handle_exit(env, run);
    655             break;
    656         }
    657     } while (ret > 0);
    658 
    659     if (env->exit_request) {
    660         env->exit_request = 0;
    661         env->exception_index = EXCP_INTERRUPT;
    662     }
    663 
    664     return ret;
    665 }
    666 
    667 void kvm_set_phys_mem(target_phys_addr_t start_addr,
    668                       ram_addr_t size,
    669                       ram_addr_t phys_offset)
    670 {
    671     KVMState *s = kvm_state;
    672     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    673     KVMSlot *mem, old;
    674     int err;
    675 
    676     if (start_addr & ~TARGET_PAGE_MASK) {
    677         if (flags >= IO_MEM_UNASSIGNED) {
    678             if (!kvm_lookup_overlapping_slot(s, start_addr,
    679                                              start_addr + size)) {
    680                 return;
    681             }
    682             fprintf(stderr, "Unaligned split of a KVM memory slot\n");
    683         } else {
    684             fprintf(stderr, "Only page-aligned memory slots supported\n");
    685         }
    686         abort();
    687     }
    688 
    689     /* KVM does not support read-only slots */
    690     phys_offset &= ~IO_MEM_ROM;
    691 
    692     while (1) {
    693         mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
    694         if (!mem) {
    695             break;
    696         }
    697 
    698         if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
    699             (start_addr + size <= mem->start_addr + mem->memory_size) &&
    700             (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
    701             /* The new slot fits into the existing one and comes with
    702              * identical parameters - nothing to be done. */
    703             return;
    704         }
    705 
    706         old = *mem;
    707 
    708         /* unregister the overlapping slot */
    709         mem->memory_size = 0;
    710         err = kvm_set_user_memory_region(s, mem);
    711         if (err) {
    712             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
    713                     __func__, strerror(-err));
    714             abort();
    715         }
    716 
    717         /* Workaround for older KVM versions: we can't join slots, even not by
    718          * unregistering the previous ones and then registering the larger
    719          * slot. We have to maintain the existing fragmentation. Sigh.
    720          *
    721          * This workaround assumes that the new slot starts at the same
    722          * address as the first existing one. If not or if some overlapping
    723          * slot comes around later, we will fail (not seen in practice so far)
    724          * - and actually require a recent KVM version. */
    725         if (s->broken_set_mem_region &&
    726             old.start_addr == start_addr && old.memory_size < size &&
    727             flags < IO_MEM_UNASSIGNED) {
    728             mem = kvm_alloc_slot(s);
    729             mem->memory_size = old.memory_size;
    730             mem->start_addr = old.start_addr;
    731             mem->phys_offset = old.phys_offset;
    732             mem->flags = 0;
    733 
    734             err = kvm_set_user_memory_region(s, mem);
    735             if (err) {
    736                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
    737                         strerror(-err));
    738                 abort();
    739             }
    740 
    741             start_addr += old.memory_size;
    742             phys_offset += old.memory_size;
    743             size -= old.memory_size;
    744             continue;
    745         }
    746 
    747         /* register prefix slot */
    748         if (old.start_addr < start_addr) {
    749             mem = kvm_alloc_slot(s);
    750             mem->memory_size = start_addr - old.start_addr;
    751             mem->start_addr = old.start_addr;
    752             mem->phys_offset = old.phys_offset;
    753             mem->flags = 0;
    754 
    755             err = kvm_set_user_memory_region(s, mem);
    756             if (err) {
    757                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
    758                         __func__, strerror(-err));
    759                 abort();
    760             }
    761         }
    762 
    763         /* register suffix slot */
    764         if (old.start_addr + old.memory_size > start_addr + size) {
    765             ram_addr_t size_delta;
    766 
    767             mem = kvm_alloc_slot(s);
    768             mem->start_addr = start_addr + size;
    769             size_delta = mem->start_addr - old.start_addr;
    770             mem->memory_size = old.memory_size - size_delta;
    771             mem->phys_offset = old.phys_offset + size_delta;
    772             mem->flags = 0;
    773 
    774             err = kvm_set_user_memory_region(s, mem);
    775             if (err) {
    776                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
    777                         __func__, strerror(-err));
    778                 abort();
    779             }
    780         }
    781     }
    782 
    783     /* in case the KVM bug workaround already "consumed" the new slot */
    784     if (!size)
    785         return;
    786 
    787     /* KVM does not need to know about this memory */
    788     if (flags >= IO_MEM_UNASSIGNED)
    789         return;
    790 
    791     mem = kvm_alloc_slot(s);
    792     mem->memory_size = size;
    793     mem->start_addr = start_addr;
    794     mem->phys_offset = phys_offset;
    795     mem->flags = 0;
    796 
    797     err = kvm_set_user_memory_region(s, mem);
    798     if (err) {
    799         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
    800                 strerror(-err));
    801         abort();
    802     }
    803 }
    804 
    805 int kvm_ioctl(KVMState *s, int type, ...)
    806 {
    807     int ret;
    808     void *arg;
    809     va_list ap;
    810 
    811     va_start(ap, type);
    812     arg = va_arg(ap, void *);
    813     va_end(ap);
    814 
    815     ret = ioctl(s->fd, type, arg);
    816     if (ret == -1)
    817         ret = -errno;
    818 
    819     return ret;
    820 }
    821 
    822 int kvm_vm_ioctl(KVMState *s, int type, ...)
    823 {
    824     int ret;
    825     void *arg;
    826     va_list ap;
    827 
    828     va_start(ap, type);
    829     arg = va_arg(ap, void *);
    830     va_end(ap);
    831 
    832     ret = ioctl(s->vmfd, type, arg);
    833     if (ret == -1)
    834         ret = -errno;
    835 
    836     return ret;
    837 }
    838 
    839 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
    840 {
    841     int ret;
    842     void *arg;
    843     va_list ap;
    844 
    845     va_start(ap, type);
    846     arg = va_arg(ap, void *);
    847     va_end(ap);
    848 
    849     ret = ioctl(env->kvm_fd, type, arg);
    850     if (ret == -1)
    851         ret = -errno;
    852 
    853     return ret;
    854 }
    855 
    856 int kvm_has_sync_mmu(void)
    857 {
    858 #ifdef KVM_CAP_SYNC_MMU
    859     KVMState *s = kvm_state;
    860 
    861     return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
    862 #else
    863     return 0;
    864 #endif
    865 }
    866 
    867 void kvm_setup_guest_memory(void *start, size_t size)
    868 {
    869     if (!kvm_has_sync_mmu()) {
    870 #ifdef MADV_DONTFORK
    871         int ret = madvise(start, size, MADV_DONTFORK);
    872 
    873         if (ret) {
    874             perror("madvice");
    875             exit(1);
    876         }
    877 #else
    878         fprintf(stderr,
    879                 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
    880         exit(1);
    881 #endif
    882     }
    883 }
    884 
    885 #ifdef KVM_CAP_SET_GUEST_DEBUG
    886 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
    887                                                  target_ulong pc)
    888 {
    889     struct kvm_sw_breakpoint *bp;
    890 
    891     QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
    892         if (bp->pc == pc)
    893             return bp;
    894     }
    895     return NULL;
    896 }
    897 
    898 int kvm_sw_breakpoints_active(CPUState *env)
    899 {
    900     return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
    901 }
    902 
    903 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
    904 {
    905     struct kvm_guest_debug dbg;
    906 
    907     dbg.control = 0;
    908     if (env->singlestep_enabled)
    909         dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    910 
    911     kvm_arch_update_guest_debug(env, &dbg);
    912     dbg.control |= reinject_trap;
    913 
    914     return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
    915 }
    916 
    917 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
    918                           target_ulong len, int type)
    919 {
    920     struct kvm_sw_breakpoint *bp;
    921     CPUState *env;
    922     int err;
    923 
    924     if (type == GDB_BREAKPOINT_SW) {
    925         bp = kvm_find_sw_breakpoint(current_env, addr);
    926         if (bp) {
    927             bp->use_count++;
    928             return 0;
    929         }
    930 
    931         bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
    932         if (!bp)
    933             return -ENOMEM;
    934 
    935         bp->pc = addr;
    936         bp->use_count = 1;
    937         err = kvm_arch_insert_sw_breakpoint(current_env, bp);
    938         if (err) {
    939             free(bp);
    940             return err;
    941         }
    942 
    943         QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
    944                           bp, entry);
    945     } else {
    946         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
    947         if (err)
    948             return err;
    949     }
    950 
    951     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    952         err = kvm_update_guest_debug(env, 0);
    953         if (err)
    954             return err;
    955     }
    956     return 0;
    957 }
    958 
    959 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
    960                           target_ulong len, int type)
    961 {
    962     struct kvm_sw_breakpoint *bp;
    963     CPUState *env;
    964     int err;
    965 
    966     if (type == GDB_BREAKPOINT_SW) {
    967         bp = kvm_find_sw_breakpoint(current_env, addr);
    968         if (!bp)
    969             return -ENOENT;
    970 
    971         if (bp->use_count > 1) {
    972             bp->use_count--;
    973             return 0;
    974         }
    975 
    976         err = kvm_arch_remove_sw_breakpoint(current_env, bp);
    977         if (err)
    978             return err;
    979 
    980         QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
    981         qemu_free(bp);
    982     } else {
    983         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
    984         if (err)
    985             return err;
    986     }
    987 
    988     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    989         err = kvm_update_guest_debug(env, 0);
    990         if (err)
    991             return err;
    992     }
    993     return 0;
    994 }
    995 
    996 void kvm_remove_all_breakpoints(CPUState *current_env)
    997 {
    998     struct kvm_sw_breakpoint *bp, *next;
    999     KVMState *s = current_env->kvm_state;
   1000     CPUState *env;
   1001 
   1002     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
   1003         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
   1004             /* Try harder to find a CPU that currently sees the breakpoint. */
   1005             for (env = first_cpu; env != NULL; env = env->next_cpu) {
   1006                 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
   1007                     break;
   1008             }
   1009         }
   1010     }
   1011     kvm_arch_remove_all_hw_breakpoints();
   1012 
   1013     for (env = first_cpu; env != NULL; env = env->next_cpu)
   1014         kvm_update_guest_debug(env, 0);
   1015 }
   1016 
   1017 #else /* !KVM_CAP_SET_GUEST_DEBUG */
   1018 
   1019 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
   1020 {
   1021     return -EINVAL;
   1022 }
   1023 
   1024 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   1025                           target_ulong len, int type)
   1026 {
   1027     return -EINVAL;
   1028 }
   1029 
   1030 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
   1031                           target_ulong len, int type)
   1032 {
   1033     return -EINVAL;
   1034 }
   1035 
   1036 void kvm_remove_all_breakpoints(CPUState *current_env)
   1037 {
   1038 }
   1039 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
   1040