Home | History | Annotate | Download | only in qemu
      1 /*
      2  * QEMU KVM support
      3  *
      4  * Copyright IBM, Corp. 2008
      5  *           Red Hat, Inc. 2008
      6  *
      7  * Authors:
      8  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      9  *  Glauber Costa     <gcosta (at) redhat.com>
     10  *
     11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     12  * See the COPYING file in the top-level directory.
     13  *
     14  */
     15 
     16 #include <sys/types.h>
     17 #include <sys/ioctl.h>
     18 #include <sys/mman.h>
     19 #include <stdarg.h>
     20 
     21 #include <linux/kvm.h>
     22 
     23 #include "qemu-common.h"
     24 #include "sysemu.h"
     25 #include "hw/hw.h"
     26 #include "gdbstub.h"
     27 #include "kvm.h"
     28 
     29 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
     30 #define PAGE_SIZE TARGET_PAGE_SIZE
     31 
     32 //#define DEBUG_KVM
     33 
     34 #ifdef DEBUG_KVM
     35 #define dprintf(fmt, ...) \
     36     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
     37 #else
     38 #define dprintf(fmt, ...) \
     39     do { } while (0)
     40 #endif
     41 
     42 typedef struct KVMSlot
     43 {
     44     target_phys_addr_t start_addr;
     45     ram_addr_t memory_size;
     46     ram_addr_t phys_offset;
     47     int slot;
     48     int flags;
     49 } KVMSlot;
     50 
     51 typedef struct kvm_dirty_log KVMDirtyLog;
     52 
     53 int kvm_allowed = 0;
     54 
     55 struct KVMState
     56 {
     57     KVMSlot slots[32];
     58     int fd;
     59     int vmfd;
     60     int coalesced_mmio;
     61     int broken_set_mem_region;
     62     int migration_log;
     63 #ifdef KVM_CAP_SET_GUEST_DEBUG
     64     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
     65 #endif
     66 };
     67 
     68 static KVMState *kvm_state;
     69 
     70 static KVMSlot *kvm_alloc_slot(KVMState *s)
     71 {
     72     int i;
     73 
     74     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     75         /* KVM private memory slots */
     76         if (i >= 8 && i < 12)
     77             continue;
     78         if (s->slots[i].memory_size == 0)
     79             return &s->slots[i];
     80     }
     81 
     82     fprintf(stderr, "%s: no free slot available\n", __func__);
     83     abort();
     84 }
     85 
     86 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
     87                                          target_phys_addr_t start_addr,
     88                                          target_phys_addr_t end_addr)
     89 {
     90     int i;
     91 
     92     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     93         KVMSlot *mem = &s->slots[i];
     94 
     95         if (start_addr == mem->start_addr &&
     96             end_addr == mem->start_addr + mem->memory_size) {
     97             return mem;
     98         }
     99     }
    100 
    101     return NULL;
    102 }
    103 
    104 /*
    105  * Find overlapping slot with lowest start address
    106  */
    107 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
    108                                             target_phys_addr_t start_addr,
    109                                             target_phys_addr_t end_addr)
    110 {
    111     KVMSlot *found = NULL;
    112     int i;
    113 
    114     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    115         KVMSlot *mem = &s->slots[i];
    116 
    117         if (mem->memory_size == 0 ||
    118             (found && found->start_addr < mem->start_addr)) {
    119             continue;
    120         }
    121 
    122         if (end_addr > mem->start_addr &&
    123             start_addr < mem->start_addr + mem->memory_size) {
    124             found = mem;
    125         }
    126     }
    127 
    128     return found;
    129 }
    130 
    131 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
    132 {
    133     struct kvm_userspace_memory_region mem;
    134 
    135     mem.slot = slot->slot;
    136     mem.guest_phys_addr = slot->start_addr;
    137     mem.memory_size = slot->memory_size;
    138     mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
    139     mem.flags = slot->flags;
    140     if (s->migration_log) {
    141         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    142     }
    143     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
    144 }
    145 
    146 
    147 int kvm_init_vcpu(CPUState *env)
    148 {
    149     KVMState *s = kvm_state;
    150     long mmap_size;
    151     int ret;
    152 
    153     dprintf("kvm_init_vcpu\n");
    154 
    155     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, env->cpu_index);
    156     if (ret < 0) {
    157         dprintf("kvm_create_vcpu failed\n");
    158         goto err;
    159     }
    160 
    161     env->kvm_fd = ret;
    162     env->kvm_state = s;
    163 
    164     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    165     if (mmap_size < 0) {
    166         dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
    167         goto err;
    168     }
    169 
    170     env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
    171                         env->kvm_fd, 0);
    172     if (env->kvm_run == MAP_FAILED) {
    173         ret = -errno;
    174         dprintf("mmap'ing vcpu state failed\n");
    175         goto err;
    176     }
    177 
    178     ret = kvm_arch_init_vcpu(env);
    179 
    180 err:
    181     return ret;
    182 }
    183 
    184 int kvm_put_mp_state(CPUState *env)
    185 {
    186     struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
    187 
    188     return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, &mp_state);
    189 }
    190 
    191 int kvm_get_mp_state(CPUState *env)
    192 {
    193     struct kvm_mp_state mp_state;
    194     int ret;
    195 
    196     ret = kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, &mp_state);
    197     if (ret < 0) {
    198         return ret;
    199     }
    200     env->mp_state = mp_state.mp_state;
    201     return 0;
    202 }
    203 
    204 int kvm_sync_vcpus(void)
    205 {
    206     CPUState *env;
    207 
    208     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    209         int ret;
    210 
    211         ret = kvm_arch_put_registers(env);
    212         if (ret)
    213             return ret;
    214     }
    215 
    216     return 0;
    217 }
    218 
    219 /*
    220  * dirty pages logging control
    221  */
    222 static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
    223                                       ram_addr_t size, int flags, int mask)
    224 {
    225     KVMState *s = kvm_state;
    226     KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
    227     int old_flags;
    228 
    229     if (mem == NULL)  {
    230             fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
    231                     TARGET_FMT_plx "\n", __func__, phys_addr,
    232                     phys_addr + size - 1);
    233             return -EINVAL;
    234     }
    235 
    236     old_flags = mem->flags;
    237 
    238     flags = (mem->flags & ~mask) | flags;
    239     mem->flags = flags;
    240 
    241     /* If nothing changed effectively, no need to issue ioctl */
    242     if (s->migration_log) {
    243         flags |= KVM_MEM_LOG_DIRTY_PAGES;
    244     }
    245     if (flags == old_flags) {
    246             return 0;
    247     }
    248 
    249     return kvm_set_user_memory_region(s, mem);
    250 }
    251 
    252 int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
    253 {
    254         return kvm_dirty_pages_log_change(phys_addr, size,
    255                                           KVM_MEM_LOG_DIRTY_PAGES,
    256                                           KVM_MEM_LOG_DIRTY_PAGES);
    257 }
    258 
    259 int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
    260 {
    261         return kvm_dirty_pages_log_change(phys_addr, size,
    262                                           0,
    263                                           KVM_MEM_LOG_DIRTY_PAGES);
    264 }
    265 
    266 int kvm_set_migration_log(int enable)
    267 {
    268     KVMState *s = kvm_state;
    269     KVMSlot *mem;
    270     int i, err;
    271 
    272     s->migration_log = enable;
    273 
    274     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    275         mem = &s->slots[i];
    276 
    277         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
    278             continue;
    279         }
    280         err = kvm_set_user_memory_region(s, mem);
    281         if (err) {
    282             return err;
    283         }
    284     }
    285     return 0;
    286 }
    287 
    288 /**
    289  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
    290  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
    291  * This means all bits are set to dirty.
    292  *
    293  * @start_add: start of logged region.
    294  * @end_addr: end of logged region.
    295  */
    296 int kvm_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
    297                                    target_phys_addr_t end_addr)
    298 {
    299     KVMState *s = kvm_state;
    300     unsigned long size, allocated_size = 0;
    301     target_phys_addr_t phys_addr;
    302     ram_addr_t addr;
    303     KVMDirtyLog d;
    304     KVMSlot *mem;
    305     int ret = 0;
    306 
    307     d.dirty_bitmap = NULL;
    308     while (start_addr < end_addr) {
    309         mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
    310         if (mem == NULL) {
    311             break;
    312         }
    313 
    314         size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
    315         if (!d.dirty_bitmap) {
    316             d.dirty_bitmap = qemu_malloc(size);
    317         } else if (size > allocated_size) {
    318             d.dirty_bitmap = qemu_realloc(d.dirty_bitmap, size);
    319         }
    320         allocated_size = size;
    321         memset(d.dirty_bitmap, 0, allocated_size);
    322 
    323         d.slot = mem->slot;
    324 
    325         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
    326             dprintf("ioctl failed %d\n", errno);
    327             ret = -1;
    328             break;
    329         }
    330 
    331         for (phys_addr = mem->start_addr, addr = mem->phys_offset;
    332              phys_addr < mem->start_addr + mem->memory_size;
    333              phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
    334             unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
    335             unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
    336             unsigned word = nr / (sizeof(*bitmap) * 8);
    337             unsigned bit = nr % (sizeof(*bitmap) * 8);
    338 
    339             if ((bitmap[word] >> bit) & 1) {
    340                 cpu_physical_memory_set_dirty(addr);
    341             }
    342         }
    343         start_addr = phys_addr;
    344     }
    345     qemu_free(d.dirty_bitmap);
    346 
    347     return ret;
    348 }
    349 
    350 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
    351 {
    352     int ret = -ENOSYS;
    353 #ifdef KVM_CAP_COALESCED_MMIO
    354     KVMState *s = kvm_state;
    355 
    356     if (s->coalesced_mmio) {
    357         struct kvm_coalesced_mmio_zone zone;
    358 
    359         zone.addr = start;
    360         zone.size = size;
    361 
    362         ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    363     }
    364 #endif
    365 
    366     return ret;
    367 }
    368 
    369 int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
    370 {
    371     int ret = -ENOSYS;
    372 #ifdef KVM_CAP_COALESCED_MMIO
    373     KVMState *s = kvm_state;
    374 
    375     if (s->coalesced_mmio) {
    376         struct kvm_coalesced_mmio_zone zone;
    377 
    378         zone.addr = start;
    379         zone.size = size;
    380 
    381         ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    382     }
    383 #endif
    384 
    385     return ret;
    386 }
    387 
    388 int kvm_check_extension(KVMState *s, unsigned int extension)
    389 {
    390     int ret;
    391 
    392     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    393     if (ret < 0) {
    394         ret = 0;
    395     }
    396 
    397     return ret;
    398 }
    399 
    400 static void kvm_reset_vcpus(void *opaque)
    401 {
    402     kvm_sync_vcpus();
    403 }
    404 
    405 int kvm_init(int smp_cpus)
    406 {
    407     static const char upgrade_note[] =
    408         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
    409         "(see http://sourceforge.net/projects/kvm).\n";
    410     KVMState *s;
    411     int ret;
    412     int i;
    413 
    414     if (smp_cpus > 1) {
    415         fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
    416         return -EINVAL;
    417     }
    418 
    419     s = qemu_mallocz(sizeof(KVMState));
    420 
    421 #ifdef KVM_CAP_SET_GUEST_DEBUG
    422     QTAILQ_INIT(&s->kvm_sw_breakpoints);
    423 #endif
    424     for (i = 0; i < ARRAY_SIZE(s->slots); i++)
    425         s->slots[i].slot = i;
    426 
    427     s->vmfd = -1;
    428     s->fd = open("/dev/kvm", O_RDWR);
    429     if (s->fd == -1) {
    430         ret = -errno;
    431         fprintf(stderr, "Could not access KVM kernel module: %m\n");
    432         goto err;
    433     }
    434 
    435     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    436     if (ret < KVM_API_VERSION) {
    437         if (ret > 0)
    438             ret = -EINVAL;
    439         fprintf(stderr, "kvm version too old\n");
    440         goto err;
    441     }
    442 
    443     if (ret > KVM_API_VERSION) {
    444         ret = -EINVAL;
    445         fprintf(stderr, "kvm version not supported\n");
    446         goto err;
    447     }
    448 
    449     s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
    450     if (s->vmfd < 0) {
    451         ret = -errno;
    452         fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %s\n", strerror(errno));
    453         goto err;
    454     }
    455 
    456     /* initially, KVM allocated its own memory and we had to jump through
    457      * hooks to make phys_ram_base point to this.  Modern versions of KVM
    458      * just use a user allocated buffer so we can use regular pages
    459      * unmodified.  Make sure we have a sufficiently modern version of KVM.
    460      */
    461     if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
    462         ret = -EINVAL;
    463         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
    464                 upgrade_note);
    465         goto err;
    466     }
    467 
    468     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
    469      * destroyed properly.  Since we rely on this capability, refuse to work
    470      * with any kernel without this capability. */
    471     if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
    472         ret = -EINVAL;
    473 
    474         fprintf(stderr,
    475                 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
    476                 upgrade_note);
    477         goto err;
    478     }
    479 
    480 #ifdef KVM_CAP_COALESCED_MMIO
    481     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
    482 #else
    483     s->coalesced_mmio = 0;
    484 #endif
    485 
    486     s->broken_set_mem_region = 1;
    487 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
    488     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
    489     if (ret > 0) {
    490         s->broken_set_mem_region = 0;
    491     }
    492 #endif
    493 
    494     ret = kvm_arch_init(s, smp_cpus);
    495     if (ret < 0)
    496         goto err;
    497 
    498     qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
    499 
    500     kvm_state = s;
    501 
    502     return 0;
    503 
    504 err:
    505     if (s) {
    506         if (s->vmfd != -1)
    507             close(s->vmfd);
    508         if (s->fd != -1)
    509             close(s->fd);
    510     }
    511     qemu_free(s);
    512 
    513     return ret;
    514 }
    515 
    516 static int kvm_handle_io(CPUState *env, uint16_t port, void *data,
    517                          int direction, int size, uint32_t count)
    518 {
    519     int i;
    520     uint8_t *ptr = data;
    521 
    522     for (i = 0; i < count; i++) {
    523         if (direction == KVM_EXIT_IO_IN) {
    524             switch (size) {
    525             case 1:
    526                 stb_p(ptr, cpu_inb(port));
    527                 break;
    528             case 2:
    529                 stw_p(ptr, cpu_inw(port));
    530                 break;
    531             case 4:
    532                 stl_p(ptr, cpu_inl(port));
    533                 break;
    534             }
    535         } else {
    536             switch (size) {
    537             case 1:
    538                 cpu_outb(port, ldub_p(ptr));
    539                 break;
    540             case 2:
    541                 cpu_outw(port, lduw_p(ptr));
    542                 break;
    543             case 4:
    544                 cpu_outl(port, ldl_p(ptr));
    545                 break;
    546             }
    547         }
    548 
    549         ptr += size;
    550     }
    551 
    552     return 1;
    553 }
    554 
    555 static void kvm_run_coalesced_mmio(CPUState *env, struct kvm_run *run)
    556 {
    557 #ifdef KVM_CAP_COALESCED_MMIO
    558     KVMState *s = kvm_state;
    559     if (s->coalesced_mmio) {
    560         struct kvm_coalesced_mmio_ring *ring;
    561 
    562         ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
    563         while (ring->first != ring->last) {
    564             struct kvm_coalesced_mmio *ent;
    565 
    566             ent = &ring->coalesced_mmio[ring->first];
    567 
    568             cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
    569             /* FIXME smp_wmb() */
    570             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
    571         }
    572     }
    573 #endif
    574 }
    575 
    576 int kvm_cpu_exec(CPUState *env)
    577 {
    578     struct kvm_run *run = env->kvm_run;
    579     int ret;
    580 
    581     dprintf("kvm_cpu_exec()\n");
    582 
    583     do {
    584         if (env->exit_request) {
    585             dprintf("interrupt exit requested\n");
    586             ret = 0;
    587             break;
    588         }
    589 
    590         kvm_arch_pre_run(env, run);
    591         ret = kvm_arch_vcpu_run(env);
    592         kvm_arch_post_run(env, run);
    593 
    594         if (ret == -EINTR || ret == -EAGAIN) {
    595             dprintf("io window exit\n");
    596             ret = 0;
    597             break;
    598         }
    599 
    600         if (ret < 0) {
    601             dprintf("kvm run failed %s\n", strerror(-ret));
    602             abort();
    603         }
    604 
    605         kvm_run_coalesced_mmio(env, run);
    606 
    607         ret = 0; /* exit loop */
    608         switch (run->exit_reason) {
    609         case KVM_EXIT_IO:
    610             dprintf("handle_io\n");
    611             ret = kvm_handle_io(env, run->io.port,
    612                                 (uint8_t *)run + run->io.data_offset,
    613                                 run->io.direction,
    614                                 run->io.size,
    615                                 run->io.count);
    616             break;
    617         case KVM_EXIT_MMIO:
    618             dprintf("handle_mmio\n");
    619             cpu_physical_memory_rw(run->mmio.phys_addr,
    620                                    run->mmio.data,
    621                                    run->mmio.len,
    622                                    run->mmio.is_write);
    623             ret = 1;
    624             break;
    625         case KVM_EXIT_IRQ_WINDOW_OPEN:
    626             dprintf("irq_window_open\n");
    627             break;
    628         case KVM_EXIT_SHUTDOWN:
    629             dprintf("shutdown\n");
    630             qemu_system_reset_request();
    631             ret = 1;
    632             break;
    633         case KVM_EXIT_UNKNOWN:
    634             dprintf("kvm_exit_unknown\n");
    635             break;
    636         case KVM_EXIT_FAIL_ENTRY:
    637             dprintf("kvm_exit_fail_entry\n");
    638             break;
    639         case KVM_EXIT_EXCEPTION:
    640             dprintf("kvm_exit_exception\n");
    641             break;
    642         case KVM_EXIT_DEBUG:
    643             dprintf("kvm_exit_debug\n");
    644 #ifdef KVM_CAP_SET_GUEST_DEBUG
    645             if (kvm_arch_debug(&run->debug.arch)) {
    646                 gdb_set_stop_cpu(env);
    647                 vm_stop(EXCP_DEBUG);
    648                 env->exception_index = EXCP_DEBUG;
    649                 return 0;
    650             }
    651             /* re-enter, this exception was guest-internal */
    652             ret = 1;
    653 #endif /* KVM_CAP_SET_GUEST_DEBUG */
    654             break;
    655         default:
    656             dprintf("kvm_arch_handle_exit\n");
    657             ret = kvm_arch_handle_exit(env, run);
    658             break;
    659         }
    660     } while (ret > 0);
    661 
    662     if (env->exit_request) {
    663         env->exit_request = 0;
    664         env->exception_index = EXCP_INTERRUPT;
    665     }
    666 
    667     return ret;
    668 }
    669 
    670 void kvm_set_phys_mem(target_phys_addr_t start_addr,
    671                       ram_addr_t size,
    672                       ram_addr_t phys_offset)
    673 {
    674     KVMState *s = kvm_state;
    675     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    676     KVMSlot *mem, old;
    677     int err;
    678 
    679     if (start_addr & ~TARGET_PAGE_MASK) {
    680         if (flags >= IO_MEM_UNASSIGNED) {
    681             if (!kvm_lookup_overlapping_slot(s, start_addr,
    682                                              start_addr + size)) {
    683                 return;
    684             }
    685             fprintf(stderr, "Unaligned split of a KVM memory slot\n");
    686         } else {
    687             fprintf(stderr, "Only page-aligned memory slots supported\n");
    688         }
    689         abort();
    690     }
    691 
    692     /* KVM does not support read-only slots */
    693     phys_offset &= ~IO_MEM_ROM;
    694 
    695     while (1) {
    696         mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
    697         if (!mem) {
    698             break;
    699         }
    700 
    701         if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
    702             (start_addr + size <= mem->start_addr + mem->memory_size) &&
    703             (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
    704             /* The new slot fits into the existing one and comes with
    705              * identical parameters - nothing to be done. */
    706             return;
    707         }
    708 
    709         old = *mem;
    710 
    711         /* unregister the overlapping slot */
    712         mem->memory_size = 0;
    713         err = kvm_set_user_memory_region(s, mem);
    714         if (err) {
    715             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
    716                     __func__, strerror(-err));
    717             abort();
    718         }
    719 
    720         /* Workaround for older KVM versions: we can't join slots, even not by
    721          * unregistering the previous ones and then registering the larger
    722          * slot. We have to maintain the existing fragmentation. Sigh.
    723          *
    724          * This workaround assumes that the new slot starts at the same
    725          * address as the first existing one. If not or if some overlapping
    726          * slot comes around later, we will fail (not seen in practice so far)
    727          * - and actually require a recent KVM version. */
    728         if (s->broken_set_mem_region &&
    729             old.start_addr == start_addr && old.memory_size < size &&
    730             flags < IO_MEM_UNASSIGNED) {
    731             mem = kvm_alloc_slot(s);
    732             mem->memory_size = old.memory_size;
    733             mem->start_addr = old.start_addr;
    734             mem->phys_offset = old.phys_offset;
    735             mem->flags = 0;
    736 
    737             err = kvm_set_user_memory_region(s, mem);
    738             if (err) {
    739                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
    740                         strerror(-err));
    741                 abort();
    742             }
    743 
    744             start_addr += old.memory_size;
    745             phys_offset += old.memory_size;
    746             size -= old.memory_size;
    747             continue;
    748         }
    749 
    750         /* register prefix slot */
    751         if (old.start_addr < start_addr) {
    752             mem = kvm_alloc_slot(s);
    753             mem->memory_size = start_addr - old.start_addr;
    754             mem->start_addr = old.start_addr;
    755             mem->phys_offset = old.phys_offset;
    756             mem->flags = 0;
    757 
    758             err = kvm_set_user_memory_region(s, mem);
    759             if (err) {
    760                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
    761                         __func__, strerror(-err));
    762                 abort();
    763             }
    764         }
    765 
    766         /* register suffix slot */
    767         if (old.start_addr + old.memory_size > start_addr + size) {
    768             ram_addr_t size_delta;
    769 
    770             mem = kvm_alloc_slot(s);
    771             mem->start_addr = start_addr + size;
    772             size_delta = mem->start_addr - old.start_addr;
    773             mem->memory_size = old.memory_size - size_delta;
    774             mem->phys_offset = old.phys_offset + size_delta;
    775             mem->flags = 0;
    776 
    777             err = kvm_set_user_memory_region(s, mem);
    778             if (err) {
    779                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
    780                         __func__, strerror(-err));
    781                 abort();
    782             }
    783         }
    784     }
    785 
    786     /* in case the KVM bug workaround already "consumed" the new slot */
    787     if (!size)
    788         return;
    789 
    790     /* KVM does not need to know about this memory */
    791     if (flags >= IO_MEM_UNASSIGNED)
    792         return;
    793 
    794     mem = kvm_alloc_slot(s);
    795     mem->memory_size = size;
    796     mem->start_addr = start_addr;
    797     mem->phys_offset = phys_offset;
    798     mem->flags = 0;
    799 
    800     err = kvm_set_user_memory_region(s, mem);
    801     if (err) {
    802         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
    803                 strerror(-err));
    804         abort();
    805     }
    806 }
    807 
    808 int kvm_ioctl(KVMState *s, int type, ...)
    809 {
    810     int ret;
    811     void *arg;
    812     va_list ap;
    813 
    814     va_start(ap, type);
    815     arg = va_arg(ap, void *);
    816     va_end(ap);
    817 
    818     ret = ioctl(s->fd, type, arg);
    819     if (ret == -1)
    820         ret = -errno;
    821 
    822     return ret;
    823 }
    824 
    825 int kvm_vm_ioctl(KVMState *s, int type, ...)
    826 {
    827     int ret;
    828     void *arg;
    829     va_list ap;
    830 
    831     va_start(ap, type);
    832     arg = va_arg(ap, void *);
    833     va_end(ap);
    834 
    835     ret = ioctl(s->vmfd, type, arg);
    836     if (ret == -1)
    837         ret = -errno;
    838 
    839     return ret;
    840 }
    841 
    842 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
    843 {
    844     int ret;
    845     void *arg;
    846     va_list ap;
    847 
    848     va_start(ap, type);
    849     arg = va_arg(ap, void *);
    850     va_end(ap);
    851 
    852     ret = ioctl(env->kvm_fd, type, arg);
    853     if (ret == -1)
    854         ret = -errno;
    855 
    856     return ret;
    857 }
    858 
    859 int kvm_has_sync_mmu(void)
    860 {
    861 #ifdef KVM_CAP_SYNC_MMU
    862     KVMState *s = kvm_state;
    863 
    864     return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
    865 #else
    866     return 0;
    867 #endif
    868 }
    869 
    870 void kvm_setup_guest_memory(void *start, size_t size)
    871 {
    872     if (!kvm_has_sync_mmu()) {
    873 #ifdef MADV_DONTFORK
    874         int ret = madvise(start, size, MADV_DONTFORK);
    875 
    876         if (ret) {
    877             perror("madvice");
    878             exit(1);
    879         }
    880 #else
    881         fprintf(stderr,
    882                 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
    883         exit(1);
    884 #endif
    885     }
    886 }
    887 
    888 #ifdef KVM_CAP_SET_GUEST_DEBUG
    889 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
    890                                                  target_ulong pc)
    891 {
    892     struct kvm_sw_breakpoint *bp;
    893 
    894     QTAILQ_FOREACH(bp, &env->kvm_state->kvm_sw_breakpoints, entry) {
    895         if (bp->pc == pc)
    896             return bp;
    897     }
    898     return NULL;
    899 }
    900 
    901 int kvm_sw_breakpoints_active(CPUState *env)
    902 {
    903     return !QTAILQ_EMPTY(&env->kvm_state->kvm_sw_breakpoints);
    904 }
    905 
    906 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
    907 {
    908     struct kvm_guest_debug dbg;
    909 
    910     dbg.control = 0;
    911     if (env->singlestep_enabled)
    912         dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    913 
    914     kvm_arch_update_guest_debug(env, &dbg);
    915     dbg.control |= reinject_trap;
    916 
    917     return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, &dbg);
    918 }
    919 
    920 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
    921                           target_ulong len, int type)
    922 {
    923     struct kvm_sw_breakpoint *bp;
    924     CPUState *env;
    925     int err;
    926 
    927     if (type == GDB_BREAKPOINT_SW) {
    928         bp = kvm_find_sw_breakpoint(current_env, addr);
    929         if (bp) {
    930             bp->use_count++;
    931             return 0;
    932         }
    933 
    934         bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
    935         if (!bp)
    936             return -ENOMEM;
    937 
    938         bp->pc = addr;
    939         bp->use_count = 1;
    940         err = kvm_arch_insert_sw_breakpoint(current_env, bp);
    941         if (err) {
    942             free(bp);
    943             return err;
    944         }
    945 
    946         QTAILQ_INSERT_HEAD(&current_env->kvm_state->kvm_sw_breakpoints,
    947                           bp, entry);
    948     } else {
    949         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
    950         if (err)
    951             return err;
    952     }
    953 
    954     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    955         err = kvm_update_guest_debug(env, 0);
    956         if (err)
    957             return err;
    958     }
    959     return 0;
    960 }
    961 
    962 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
    963                           target_ulong len, int type)
    964 {
    965     struct kvm_sw_breakpoint *bp;
    966     CPUState *env;
    967     int err;
    968 
    969     if (type == GDB_BREAKPOINT_SW) {
    970         bp = kvm_find_sw_breakpoint(current_env, addr);
    971         if (!bp)
    972             return -ENOENT;
    973 
    974         if (bp->use_count > 1) {
    975             bp->use_count--;
    976             return 0;
    977         }
    978 
    979         err = kvm_arch_remove_sw_breakpoint(current_env, bp);
    980         if (err)
    981             return err;
    982 
    983         QTAILQ_REMOVE(&current_env->kvm_state->kvm_sw_breakpoints, bp, entry);
    984         qemu_free(bp);
    985     } else {
    986         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
    987         if (err)
    988             return err;
    989     }
    990 
    991     for (env = first_cpu; env != NULL; env = env->next_cpu) {
    992         err = kvm_update_guest_debug(env, 0);
    993         if (err)
    994             return err;
    995     }
    996     return 0;
    997 }
    998 
    999 void kvm_remove_all_breakpoints(CPUState *current_env)
   1000 {
   1001     struct kvm_sw_breakpoint *bp, *next;
   1002     KVMState *s = current_env->kvm_state;
   1003     CPUState *env;
   1004 
   1005     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
   1006         if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
   1007             /* Try harder to find a CPU that currently sees the breakpoint. */
   1008             for (env = first_cpu; env != NULL; env = env->next_cpu) {
   1009                 if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
   1010                     break;
   1011             }
   1012         }
   1013     }
   1014     kvm_arch_remove_all_hw_breakpoints();
   1015 
   1016     for (env = first_cpu; env != NULL; env = env->next_cpu)
   1017         kvm_update_guest_debug(env, 0);
   1018 }
   1019 
   1020 #else /* !KVM_CAP_SET_GUEST_DEBUG */
   1021 
   1022 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
   1023 {
   1024     return -EINVAL;
   1025 }
   1026 
   1027 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   1028                           target_ulong len, int type)
   1029 {
   1030     return -EINVAL;
   1031 }
   1032 
   1033 int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
   1034                           target_ulong len, int type)
   1035 {
   1036     return -EINVAL;
   1037 }
   1038 
   1039 void kvm_remove_all_breakpoints(CPUState *current_env)
   1040 {
   1041 }
   1042 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
   1043