Home | History | Annotate | Download | only in qemu
      1 /*
      2  * QEMU KVM support
      3  *
      4  * Copyright IBM, Corp. 2008
      5  *           Red Hat, Inc. 2008
      6  *
      7  * Authors:
      8  *  Anthony Liguori   <aliguori (at) us.ibm.com>
      9  *  Glauber Costa     <gcosta (at) redhat.com>
     10  *
     11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
     12  * See the COPYING file in the top-level directory.
     13  *
     14  */
     15 
     16 #include <sys/types.h>
     17 #include <sys/ioctl.h>
     18 #include <sys/mman.h>
     19 #include <stdarg.h>
     20 
     21 #include <linux/kvm.h>
     22 
     23 #include "cpu.h"
     24 #include "qemu-common.h"
     25 #include "sysemu/sysemu.h"
     26 #include "hw/hw.h"
     27 #include "android/kvm.h"
     28 #include "exec/gdbstub.h"
     29 #include "sysemu/kvm.h"
     30 
     31 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
     32 #define PAGE_SIZE TARGET_PAGE_SIZE
     33 
     34 //#define DEBUG_KVM
     35 
     36 #ifdef DEBUG_KVM
     37 #define dprintf(fmt, ...) \
     38     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
     39 #else
     40 #define dprintf(fmt, ...) \
     41     do { } while (0)
     42 #endif
     43 
     44 typedef struct KVMSlot
     45 {
     46     hwaddr start_addr;
     47     ram_addr_t memory_size;
     48     ram_addr_t phys_offset;
     49     int slot;
     50     int flags;
     51 } KVMSlot;
     52 
     53 typedef struct kvm_dirty_log KVMDirtyLog;
     54 
     55 int kvm_allowed = 0;
     56 
     57 struct KVMState
     58 {
     59     KVMSlot slots[32];
     60     int fd;
     61     int vmfd;
     62     int coalesced_mmio;
     63     int broken_set_mem_region;
     64     int migration_log;
     65 #ifdef KVM_CAP_SET_GUEST_DEBUG
     66     struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
     67 #endif
     68 };
     69 
     70 static KVMState *kvm_state;
     71 
     72 static KVMSlot *kvm_alloc_slot(KVMState *s)
     73 {
     74     int i;
     75 
     76     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     77         /* KVM private memory slots */
     78         if (i >= 8 && i < 12)
     79             continue;
     80         if (s->slots[i].memory_size == 0)
     81             return &s->slots[i];
     82     }
     83 
     84     fprintf(stderr, "%s: no free slot available\n", __func__);
     85     abort();
     86 }
     87 
     88 static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
     89                                          hwaddr start_addr,
     90                                          ram_addr_t size)
     91 {
     92     int i;
     93 
     94     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
     95         KVMSlot *mem = &s->slots[i];
     96         if (start_addr == mem->start_addr && size == mem->memory_size) {
     97             return mem;
     98         }
     99     }
    100 
    101     return NULL;
    102 }
    103 
    104 /*
    105  * Find overlapping slot with lowest start address
    106  */
    107 static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
    108                                             hwaddr start_addr,
    109                                             ram_addr_t size)
    110 {
    111     KVMSlot *found = NULL;
    112     int i;
    113 
    114     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    115         KVMSlot *mem = &s->slots[i];
    116 
    117         // Skip empty slots.
    118         if (!mem->memory_size)
    119             continue;
    120 
    121         // Skip non-overlapping slots, conditions are:
    122         //    start_addr + size <= mem->start_addr ||
    123         //    start_addr >= mem->start_addr + mem->memory_size
    124         //
    125         // However, we want to avoid wrapping errors, so avoid
    126         // additions and only compare positive values.
    127         if (start_addr <= mem->start_addr) {
    128             if (mem->start_addr - start_addr >= size) {
    129                 continue;
    130             }
    131         } else if (start_addr - mem->start_addr >= mem->memory_size) {
    132             continue;
    133         }
    134 
    135         if (found && found->start_addr < mem->start_addr) {
    136             continue;
    137         }
    138 
    139         found = mem;
    140     }
    141 
    142     return found;
    143 }
    144 
    145 static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
    146 {
    147     struct kvm_userspace_memory_region mem;
    148 
    149     mem.slot = slot->slot;
    150     mem.guest_phys_addr = slot->start_addr;
    151     mem.memory_size = slot->memory_size;
    152     mem.userspace_addr = (unsigned long)qemu_get_ram_ptr(slot->phys_offset);
    153     mem.flags = slot->flags;
    154     if (s->migration_log) {
    155         mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
    156     }
    157     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
    158 }
    159 
    160 
    161 int kvm_init_vcpu(CPUState *cpu)
    162 {
    163     KVMState *s = kvm_state;
    164     long mmap_size;
    165     int ret;
    166 
    167     dprintf("kvm_init_vcpu\n");
    168 
    169     ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, cpu->cpu_index);
    170     if (ret < 0) {
    171         dprintf("kvm_create_vcpu failed\n");
    172         goto err;
    173     }
    174 
    175     cpu->kvm_fd = ret;
    176     cpu->kvm_state = s;
    177 
    178     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    179     if (mmap_size < 0) {
    180         dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n");
    181         goto err;
    182     }
    183 
    184     cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
    185                         cpu->kvm_fd, 0);
    186     if (cpu->kvm_run == MAP_FAILED) {
    187         ret = -errno;
    188         dprintf("mmap'ing vcpu state failed\n");
    189         goto err;
    190     }
    191 
    192     ret = kvm_arch_init_vcpu(cpu);
    193 
    194 err:
    195     return ret;
    196 }
    197 
    198 int kvm_put_mp_state(CPUState *cpu)
    199 {
    200     CPUArchState *env = cpu->env_ptr;
    201     struct kvm_mp_state mp_state = { .mp_state = env->mp_state };
    202 
    203     return kvm_vcpu_ioctl(cpu, KVM_SET_MP_STATE, &mp_state);
    204 }
    205 
    206 int kvm_get_mp_state(CPUState *cpu)
    207 {
    208     struct kvm_mp_state mp_state;
    209     int ret;
    210 
    211     ret = kvm_vcpu_ioctl(cpu, KVM_GET_MP_STATE, &mp_state);
    212     if (ret < 0) {
    213         return ret;
    214     }
    215     CPUArchState *env = cpu->env_ptr;
    216     env->mp_state = mp_state.mp_state;
    217     return 0;
    218 }
    219 
    220 int kvm_sync_vcpus(void)
    221 {
    222     CPUState *cpu;
    223 
    224     CPU_FOREACH(cpu) {
    225         int ret = kvm_arch_put_registers(cpu);
    226         if (ret)
    227             return ret;
    228     }
    229 
    230     return 0;
    231 }
    232 
    233 /*
    234  * dirty pages logging control
    235  */
    236 static int kvm_dirty_pages_log_change(hwaddr phys_addr,
    237                                       ram_addr_t size, int flags, int mask)
    238 {
    239     KVMState *s = kvm_state;
    240     KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, size);
    241     int old_flags;
    242 
    243     if (mem == NULL)  {
    244             fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
    245                     TARGET_FMT_plx "\n", __func__, (hwaddr)phys_addr,
    246                     (hwaddr)(phys_addr + size - 1));
    247             return -EINVAL;
    248     }
    249 
    250     old_flags = mem->flags;
    251 
    252     flags = (mem->flags & ~mask) | flags;
    253     mem->flags = flags;
    254 
    255     /* If nothing changed effectively, no need to issue ioctl */
    256     if (s->migration_log) {
    257         flags |= KVM_MEM_LOG_DIRTY_PAGES;
    258     }
    259     if (flags == old_flags) {
    260             return 0;
    261     }
    262 
    263     return kvm_set_user_memory_region(s, mem);
    264 }
    265 
    266 int kvm_log_start(hwaddr phys_addr, ram_addr_t size)
    267 {
    268         return kvm_dirty_pages_log_change(phys_addr, size,
    269                                           KVM_MEM_LOG_DIRTY_PAGES,
    270                                           KVM_MEM_LOG_DIRTY_PAGES);
    271 }
    272 
    273 int kvm_log_stop(hwaddr phys_addr, ram_addr_t size)
    274 {
    275         return kvm_dirty_pages_log_change(phys_addr, size,
    276                                           0,
    277                                           KVM_MEM_LOG_DIRTY_PAGES);
    278 }
    279 
    280 int kvm_set_migration_log(int enable)
    281 {
    282     KVMState *s = kvm_state;
    283     KVMSlot *mem;
    284     int i, err;
    285 
    286     s->migration_log = enable;
    287 
    288     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
    289         mem = &s->slots[i];
    290 
    291         if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
    292             continue;
    293         }
    294         err = kvm_set_user_memory_region(s, mem);
    295         if (err) {
    296             return err;
    297         }
    298     }
    299     return 0;
    300 }
    301 
    302 /**
    303  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
    304  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
    305  * This means all bits are set to dirty.
    306  *
    307  * @start_add: start of logged region.
    308  * @end_addr: end of logged region.
    309  */
    310 int kvm_physical_sync_dirty_bitmap(hwaddr start_addr,
    311                                    hwaddr end_addr)
    312 {
    313     KVMState *s = kvm_state;
    314     unsigned long size, allocated_size = 0;
    315     hwaddr phys_addr;
    316     ram_addr_t addr;
    317     KVMDirtyLog d;
    318     KVMSlot *mem;
    319     int ret = 0;
    320 
    321     d.dirty_bitmap = NULL;
    322     while (start_addr < end_addr) {
    323         ram_addr_t start_size = (ram_addr_t)(end_addr - start_addr);
    324 
    325         mem = kvm_lookup_overlapping_slot(s, start_addr, start_size);
    326         if (mem == NULL) {
    327             break;
    328         }
    329 
    330         size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
    331         if (size > allocated_size) {
    332             d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
    333             allocated_size = size;
    334         }
    335         memset(d.dirty_bitmap, 0, size);
    336 
    337         d.slot = mem->slot;
    338 
    339         if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
    340             dprintf("ioctl failed %d\n", errno);
    341             ret = -1;
    342             break;
    343         }
    344 
    345         for (phys_addr = mem->start_addr, addr = mem->phys_offset;
    346              phys_addr - mem->start_addr < mem->memory_size;
    347              phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
    348             unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
    349             unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
    350             unsigned word = nr / (sizeof(*bitmap) * 8);
    351             unsigned bit = nr % (sizeof(*bitmap) * 8);
    352 
    353             if ((bitmap[word] >> bit) & 1) {
    354                 cpu_physical_memory_set_dirty(addr);
    355             }
    356         }
    357         start_addr = phys_addr;
    358         if (!start_addr) {
    359             // Handle wrap-around, which happens when a slot is mapped
    360             // at the end of the physical address space.
    361             break;
    362         }
    363     }
    364     g_free(d.dirty_bitmap);
    365 
    366     return ret;
    367 }
    368 
    369 int kvm_coalesce_mmio_region(hwaddr start, ram_addr_t size)
    370 {
    371     int ret = -ENOSYS;
    372 #ifdef KVM_CAP_COALESCED_MMIO
    373     KVMState *s = kvm_state;
    374 
    375     if (s->coalesced_mmio) {
    376         struct kvm_coalesced_mmio_zone zone;
    377 
    378         zone.addr = start;
    379         zone.size = size;
    380 
    381         ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
    382     }
    383 #endif
    384 
    385     return ret;
    386 }
    387 
    388 int kvm_uncoalesce_mmio_region(hwaddr start, ram_addr_t size)
    389 {
    390     int ret = -ENOSYS;
    391 #ifdef KVM_CAP_COALESCED_MMIO
    392     KVMState *s = kvm_state;
    393 
    394     if (s->coalesced_mmio) {
    395         struct kvm_coalesced_mmio_zone zone;
    396 
    397         zone.addr = start;
    398         zone.size = size;
    399 
    400         ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
    401     }
    402 #endif
    403 
    404     return ret;
    405 }
    406 
    407 int kvm_check_extension(KVMState *s, unsigned int extension)
    408 {
    409     int ret;
    410 
    411     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
    412     if (ret < 0) {
    413         ret = 0;
    414     }
    415 
    416     return ret;
    417 }
    418 
    419 static void kvm_reset_vcpus(void *opaque)
    420 {
    421     kvm_sync_vcpus();
    422 }
    423 
    424 int kvm_init(int smp_cpus)
    425 {
    426     static const char upgrade_note[] =
    427         "Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
    428         "(see http://sourceforge.net/projects/kvm).\n";
    429     KVMState *s;
    430     int ret;
    431     int i;
    432 
    433     if (smp_cpus > 1) {
    434         fprintf(stderr, "No SMP KVM support, use '-smp 1'\n");
    435         return -EINVAL;
    436     }
    437 
    438     s = g_malloc0(sizeof(KVMState));
    439 
    440 #ifdef KVM_CAP_SET_GUEST_DEBUG
    441     QTAILQ_INIT(&s->kvm_sw_breakpoints);
    442 #endif
    443     for (i = 0; i < ARRAY_SIZE(s->slots); i++)
    444         s->slots[i].slot = i;
    445 
    446     char* kvm_device = getenv(KVM_DEVICE_NAME_ENV);
    447     if (NULL == kvm_device) {
    448       kvm_device = "/dev/kvm";
    449     }
    450 
    451     s->vmfd = -1;
    452     s->fd = open(kvm_device, O_RDWR);
    453     if (s->fd == -1) {
    454         ret = -errno;
    455         fprintf(stderr, "Could not access KVM kernel module: %m\n");
    456         goto err;
    457     }
    458 
    459     ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
    460     if (ret < KVM_API_VERSION) {
    461         if (ret > 0)
    462             ret = -EINVAL;
    463         fprintf(stderr, "kvm version too old\n");
    464         goto err;
    465     }
    466 
    467     if (ret > KVM_API_VERSION) {
    468         ret = -EINVAL;
    469         fprintf(stderr, "kvm version not supported\n");
    470         goto err;
    471     }
    472 
    473     do {
    474       s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0);
    475     } while (s->vmfd < 0 && (EINTR == errno || EAGAIN == errno));
    476 
    477     if (s->vmfd < 0) {
    478         ret = -errno;
    479         fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", errno, strerror(errno));
    480         goto err;
    481     }
    482 
    483     /* initially, KVM allocated its own memory and we had to jump through
    484      * hooks to make phys_ram_base point to this.  Modern versions of KVM
    485      * just use a user allocated buffer so we can use regular pages
    486      * unmodified.  Make sure we have a sufficiently modern version of KVM.
    487      */
    488     if (!kvm_check_extension(s, KVM_CAP_USER_MEMORY)) {
    489         ret = -EINVAL;
    490         fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n%s",
    491                 upgrade_note);
    492         goto err;
    493     }
    494 
    495     /* There was a nasty bug in < kvm-80 that prevents memory slots from being
    496      * destroyed properly.  Since we rely on this capability, refuse to work
    497      * with any kernel without this capability. */
    498     if (!kvm_check_extension(s, KVM_CAP_DESTROY_MEMORY_REGION_WORKS)) {
    499         ret = -EINVAL;
    500 
    501         fprintf(stderr,
    502                 "KVM kernel module broken (DESTROY_MEMORY_REGION).\n%s",
    503                 upgrade_note);
    504         goto err;
    505     }
    506 
    507 #ifdef KVM_CAP_COALESCED_MMIO
    508     s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
    509 #else
    510     s->coalesced_mmio = 0;
    511 #endif
    512 
    513     s->broken_set_mem_region = 1;
    514 #ifdef KVM_CAP_JOIN_MEMORY_REGIONS_WORKS
    515     ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
    516     if (ret > 0) {
    517         s->broken_set_mem_region = 0;
    518     }
    519 #endif
    520 
    521     ret = kvm_arch_init(s, smp_cpus);
    522     if (ret < 0)
    523         goto err;
    524 
    525     qemu_register_reset(kvm_reset_vcpus, INT_MAX, NULL);
    526 
    527     kvm_state = s;
    528 
    529     return 0;
    530 
    531 err:
    532     if (s) {
    533         if (s->vmfd != -1)
    534             close(s->vmfd);
    535         if (s->fd != -1)
    536             close(s->fd);
    537     }
    538     g_free(s);
    539 
    540     return ret;
    541 }
    542 
    543 static int kvm_handle_io(CPUState *cpu, uint16_t port, void *data,
    544                          int direction, int size, uint32_t count)
    545 {
    546     int i;
    547     uint8_t *ptr = data;
    548 
    549     for (i = 0; i < count; i++) {
    550         if (direction == KVM_EXIT_IO_IN) {
    551             switch (size) {
    552             case 1:
    553                 stb_p(ptr, cpu_inb(port));
    554                 break;
    555             case 2:
    556                 stw_p(ptr, cpu_inw(port));
    557                 break;
    558             case 4:
    559                 stl_p(ptr, cpu_inl(port));
    560                 break;
    561             }
    562         } else {
    563             switch (size) {
    564             case 1:
    565                 cpu_outb(port, ldub_p(ptr));
    566                 break;
    567             case 2:
    568                 cpu_outw(port, lduw_p(ptr));
    569                 break;
    570             case 4:
    571                 cpu_outl(port, ldl_p(ptr));
    572                 break;
    573             }
    574         }
    575 
    576         ptr += size;
    577     }
    578 
    579     return 1;
    580 }
    581 
    582 static void kvm_run_coalesced_mmio(CPUState *cpu, struct kvm_run *run)
    583 {
    584 #ifdef KVM_CAP_COALESCED_MMIO
    585     KVMState *s = kvm_state;
    586     if (s->coalesced_mmio) {
    587         struct kvm_coalesced_mmio_ring *ring;
    588 
    589         ring = (void *)run + (s->coalesced_mmio * TARGET_PAGE_SIZE);
    590         while (ring->first != ring->last) {
    591             struct kvm_coalesced_mmio *ent;
    592 
    593             ent = &ring->coalesced_mmio[ring->first];
    594 
    595             cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
    596             /* FIXME smp_wmb() */
    597             ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
    598         }
    599     }
    600 #endif
    601 }
    602 
    603 int kvm_cpu_exec(CPUState *cpu)
    604 {
    605     CPUArchState *env = cpu->env_ptr;
    606     struct kvm_run *run = cpu->kvm_run;
    607     int ret;
    608 
    609     dprintf("kvm_cpu_exec()\n");
    610 
    611     do {
    612         if (cpu->exit_request) {
    613             dprintf("interrupt exit requested\n");
    614             ret = 0;
    615             break;
    616         }
    617 
    618         kvm_arch_pre_run(cpu, run);
    619         ret = kvm_arch_vcpu_run(cpu);
    620         kvm_arch_post_run(cpu, run);
    621 
    622         if (ret == -EINTR || ret == -EAGAIN) {
    623             dprintf("io window exit\n");
    624             ret = 0;
    625             break;
    626         }
    627 
    628         if (ret < 0) {
    629             dprintf("kvm run failed %s\n", strerror(-ret));
    630             abort();
    631         }
    632 
    633         kvm_run_coalesced_mmio(cpu, run);
    634 
    635         ret = 0; /* exit loop */
    636         switch (run->exit_reason) {
    637         case KVM_EXIT_IO:
    638             dprintf("handle_io\n");
    639             ret = kvm_handle_io(cpu, run->io.port,
    640                                 (uint8_t *)run + run->io.data_offset,
    641                                 run->io.direction,
    642                                 run->io.size,
    643                                 run->io.count);
    644             break;
    645         case KVM_EXIT_MMIO:
    646             dprintf("handle_mmio\n");
    647             cpu_physical_memory_rw(run->mmio.phys_addr,
    648                                    run->mmio.data,
    649                                    run->mmio.len,
    650                                    run->mmio.is_write);
    651             ret = 1;
    652             break;
    653         case KVM_EXIT_IRQ_WINDOW_OPEN:
    654             dprintf("irq_window_open\n");
    655             break;
    656         case KVM_EXIT_SHUTDOWN:
    657             dprintf("shutdown\n");
    658             qemu_system_reset_request();
    659             ret = 1;
    660             break;
    661         case KVM_EXIT_UNKNOWN:
    662             dprintf("kvm_exit_unknown\n");
    663             break;
    664         case KVM_EXIT_FAIL_ENTRY:
    665             dprintf("kvm_exit_fail_entry\n");
    666             break;
    667         case KVM_EXIT_EXCEPTION:
    668             dprintf("kvm_exit_exception\n");
    669             break;
    670         case KVM_EXIT_DEBUG:
    671             dprintf("kvm_exit_debug\n");
    672 #ifdef KVM_CAP_SET_GUEST_DEBUG
    673             if (kvm_arch_debug(&run->debug.arch)) {
    674                 gdb_set_stop_cpu(cpu);
    675                 vm_stop(EXCP_DEBUG);\
    676                 env->exception_index = EXCP_DEBUG;
    677                 return 0;
    678             }
    679             /* re-enter, this exception was guest-internal */
    680             ret = 1;
    681 #endif /* KVM_CAP_SET_GUEST_DEBUG */
    682             break;
    683         default:
    684             dprintf("kvm_arch_handle_exit\n");
    685             ret = kvm_arch_handle_exit(cpu, run);
    686             break;
    687         }
    688     } while (ret > 0);
    689 
    690     if (cpu->exit_request) {
    691         cpu->exit_request = 0;
    692         env->exception_index = EXCP_INTERRUPT;
    693     }
    694 
    695     return ret;
    696 }
    697 
    698 void kvm_set_phys_mem(hwaddr start_addr,
    699                       ram_addr_t size,
    700                       ram_addr_t phys_offset)
    701 {
    702     KVMState *s = kvm_state;
    703     ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
    704     KVMSlot *mem, old;
    705     int err;
    706 
    707     if (start_addr & ~TARGET_PAGE_MASK) {
    708         if (flags >= IO_MEM_UNASSIGNED) {
    709             if (!kvm_lookup_overlapping_slot(s, start_addr, size)) {
    710                 return;
    711             }
    712             fprintf(stderr, "Unaligned split of a KVM memory slot\n");
    713         } else {
    714             fprintf(stderr, "Only page-aligned memory slots supported\n");
    715         }
    716         abort();
    717     }
    718 
    719     /* KVM does not support read-only slots */
    720     phys_offset &= ~IO_MEM_ROM;
    721 
    722     while (1) {
    723         mem = kvm_lookup_overlapping_slot(s, start_addr, size);
    724         if (!mem) {
    725             break;
    726         }
    727 
    728         if (flags < IO_MEM_UNASSIGNED && start_addr >= mem->start_addr &&
    729             (start_addr + size <= mem->start_addr + mem->memory_size) &&
    730             (phys_offset - start_addr == mem->phys_offset - mem->start_addr)) {
    731             /* The new slot fits into the existing one and comes with
    732              * identical parameters - nothing to be done. */
    733             return;
    734         }
    735 
    736         old = *mem;
    737 
    738         /* unregister the overlapping slot */
    739         mem->memory_size = 0;
    740         err = kvm_set_user_memory_region(s, mem);
    741         if (err) {
    742             fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
    743                     __func__, strerror(-err));
    744             abort();
    745         }
    746 
    747         /* Workaround for older KVM versions: we can't join slots, even not by
    748          * unregistering the previous ones and then registering the larger
    749          * slot. We have to maintain the existing fragmentation. Sigh.
    750          *
    751          * This workaround assumes that the new slot starts at the same
    752          * address as the first existing one. If not or if some overlapping
    753          * slot comes around later, we will fail (not seen in practice so far)
    754          * - and actually require a recent KVM version. */
    755         if (s->broken_set_mem_region &&
    756             old.start_addr == start_addr && old.memory_size < size &&
    757             flags < IO_MEM_UNASSIGNED) {
    758             mem = kvm_alloc_slot(s);
    759             mem->memory_size = old.memory_size;
    760             mem->start_addr = old.start_addr;
    761             mem->phys_offset = old.phys_offset;
    762             mem->flags = 0;
    763 
    764             err = kvm_set_user_memory_region(s, mem);
    765             if (err) {
    766                 fprintf(stderr, "%s: error updating slot: %s\n", __func__,
    767                         strerror(-err));
    768                 abort();
    769             }
    770 
    771             start_addr += old.memory_size;
    772             phys_offset += old.memory_size;
    773             size -= old.memory_size;
    774             continue;
    775         }
    776 
    777         /* register prefix slot */
    778         if (old.start_addr < start_addr) {
    779             mem = kvm_alloc_slot(s);
    780             mem->memory_size = start_addr - old.start_addr;
    781             mem->start_addr = old.start_addr;
    782             mem->phys_offset = old.phys_offset;
    783             mem->flags = 0;
    784 
    785             err = kvm_set_user_memory_region(s, mem);
    786             if (err) {
    787                 fprintf(stderr, "%s: error registering prefix slot: %s\n",
    788                         __func__, strerror(-err));
    789                 abort();
    790             }
    791         }
    792 
    793         /* register suffix slot */
    794         if (old.start_addr + old.memory_size > start_addr + size) {
    795             ram_addr_t size_delta;
    796 
    797             mem = kvm_alloc_slot(s);
    798             mem->start_addr = start_addr + size;
    799             size_delta = mem->start_addr - old.start_addr;
    800             mem->memory_size = old.memory_size - size_delta;
    801             mem->phys_offset = old.phys_offset + size_delta;
    802             mem->flags = 0;
    803 
    804             err = kvm_set_user_memory_region(s, mem);
    805             if (err) {
    806                 fprintf(stderr, "%s: error registering suffix slot: %s\n",
    807                         __func__, strerror(-err));
    808                 abort();
    809             }
    810         }
    811     }
    812 
    813     /* in case the KVM bug workaround already "consumed" the new slot */
    814     if (!size)
    815         return;
    816 
    817     /* KVM does not need to know about this memory */
    818     if (flags >= IO_MEM_UNASSIGNED)
    819         return;
    820 
    821     mem = kvm_alloc_slot(s);
    822     mem->memory_size = size;
    823     mem->start_addr = start_addr;
    824     mem->phys_offset = phys_offset;
    825     mem->flags = 0;
    826 
    827     err = kvm_set_user_memory_region(s, mem);
    828     if (err) {
    829         fprintf(stderr, "%s: error registering slot: %s\n", __func__,
    830                 strerror(-err));
    831         abort();
    832     }
    833 }
    834 
    835 int kvm_ioctl(KVMState *s, int type, ...)
    836 {
    837     int ret;
    838     void *arg;
    839     va_list ap;
    840 
    841     va_start(ap, type);
    842     arg = va_arg(ap, void *);
    843     va_end(ap);
    844 
    845     ret = ioctl(s->fd, type, arg);
    846     if (ret == -1)
    847         ret = -errno;
    848 
    849     return ret;
    850 }
    851 
    852 int kvm_vm_ioctl(KVMState *s, int type, ...)
    853 {
    854     int ret;
    855     void *arg;
    856     va_list ap;
    857 
    858     va_start(ap, type);
    859     arg = va_arg(ap, void *);
    860     va_end(ap);
    861 
    862     ret = ioctl(s->vmfd, type, arg);
    863     if (ret == -1)
    864         ret = -errno;
    865 
    866     return ret;
    867 }
    868 
    869 int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
    870 {
    871     int ret;
    872     void *arg;
    873     va_list ap;
    874 
    875     va_start(ap, type);
    876     arg = va_arg(ap, void *);
    877     va_end(ap);
    878 
    879     ret = ioctl(cpu->kvm_fd, type, arg);
    880     if (ret == -1)
    881         ret = -errno;
    882 
    883     return ret;
    884 }
    885 
    886 int kvm_has_sync_mmu(void)
    887 {
    888 #ifdef KVM_CAP_SYNC_MMU
    889     KVMState *s = kvm_state;
    890 
    891     return kvm_check_extension(s, KVM_CAP_SYNC_MMU);
    892 #else
    893     return 0;
    894 #endif
    895 }
    896 
    897 void kvm_setup_guest_memory(void *start, size_t size)
    898 {
    899     if (!kvm_has_sync_mmu()) {
    900 #ifdef MADV_DONTFORK
    901         int ret = madvise(start, size, MADV_DONTFORK);
    902 
    903         if (ret) {
    904             perror("madvice");
    905             exit(1);
    906         }
    907 #else
    908         fprintf(stderr,
    909                 "Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
    910         exit(1);
    911 #endif
    912     }
    913 }
    914 
    915 #ifdef KVM_CAP_SET_GUEST_DEBUG
    916 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
    917                                                  target_ulong pc)
    918 {
    919     struct kvm_sw_breakpoint *bp;
    920 
    921     QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
    922         if (bp->pc == pc)
    923             return bp;
    924     }
    925     return NULL;
    926 }
    927 
    928 int kvm_sw_breakpoints_active(CPUState *cpu)
    929 {
    930     return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
    931 }
    932 
    933 int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
    934 {
    935     struct kvm_guest_debug dbg;
    936 
    937     dbg.control = 0;
    938     if (cpu->singlestep_enabled)
    939         dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
    940 
    941     kvm_arch_update_guest_debug(cpu, &dbg);
    942     dbg.control |= reinject_trap;
    943 
    944     return kvm_vcpu_ioctl(cpu, KVM_SET_GUEST_DEBUG, &dbg);
    945 }
    946 
    947 int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
    948                           target_ulong len, int type)
    949 {
    950     struct kvm_sw_breakpoint *bp;
    951     int err;
    952 
    953     if (type == GDB_BREAKPOINT_SW) {
    954         bp = kvm_find_sw_breakpoint(cpu, addr);
    955         if (bp) {
    956             bp->use_count++;
    957             return 0;
    958         }
    959 
    960         bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
    961         if (!bp)
    962             return -ENOMEM;
    963 
    964         bp->pc = addr;
    965         bp->use_count = 1;
    966         err = kvm_arch_insert_sw_breakpoint(cpu, bp);
    967         if (err) {
    968             free(bp);
    969             return err;
    970         }
    971 
    972         QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints,
    973                           bp, entry);
    974     } else {
    975         err = kvm_arch_insert_hw_breakpoint(addr, len, type);
    976         if (err)
    977             return err;
    978     }
    979 
    980     CPU_FOREACH(cpu) {
    981         err = kvm_update_guest_debug(cpu, 0);
    982         if (err)
    983             return err;
    984     }
    985     return 0;
    986 }
    987 
    988 int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
    989                           target_ulong len, int type)
    990 {
    991     struct kvm_sw_breakpoint *bp;
    992     int err;
    993 
    994     if (type == GDB_BREAKPOINT_SW) {
    995         bp = kvm_find_sw_breakpoint(cpu, addr);
    996         if (!bp)
    997             return -ENOENT;
    998 
    999         if (bp->use_count > 1) {
   1000             bp->use_count--;
   1001             return 0;
   1002         }
   1003 
   1004         err = kvm_arch_remove_sw_breakpoint(cpu, bp);
   1005         if (err)
   1006             return err;
   1007 
   1008         QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
   1009         g_free(bp);
   1010     } else {
   1011         err = kvm_arch_remove_hw_breakpoint(addr, len, type);
   1012         if (err)
   1013             return err;
   1014     }
   1015 
   1016     CPU_FOREACH(cpu) {
   1017         err = kvm_update_guest_debug(cpu, 0);
   1018         if (err)
   1019             return err;
   1020     }
   1021     return 0;
   1022 }
   1023 
   1024 void kvm_remove_all_breakpoints(CPUState *cpu)
   1025 {
   1026     struct kvm_sw_breakpoint *bp, *next;
   1027     KVMState *s = cpu->kvm_state;
   1028 
   1029     QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
   1030         if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
   1031             /* Try harder to find a CPU that currently sees the breakpoint. */
   1032             CPU_FOREACH(cpu) {
   1033                 if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0)
   1034                     break;
   1035             }
   1036         }
   1037     }
   1038     kvm_arch_remove_all_hw_breakpoints();
   1039 
   1040     CPU_FOREACH(cpu) {
   1041         kvm_update_guest_debug(cpu, 0);
   1042     }
   1043 }
   1044 
   1045 #else /* !KVM_CAP_SET_GUEST_DEBUG */
   1046 
   1047 int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
   1048 {
   1049     return -EINVAL;
   1050 }
   1051 
   1052 int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
   1053                           target_ulong len, int type)
   1054 {
   1055     return -EINVAL;
   1056 }
   1057 
   1058 int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
   1059                           target_ulong len, int type)
   1060 {
   1061     return -EINVAL;
   1062 }
   1063 
   1064 void kvm_remove_all_breakpoints(CPUState *cpu)
   1065 {
   1066 }
   1067 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
   1068