Home | History | Annotate | Download | only in qemu
      1 /*
      2  *  KQEMU support
      3  *
      4  *  Copyright (c) 2005-2008 Fabrice Bellard
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Lesser General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Lesser General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Lesser General Public
     17  * License along with this library; if not, write to the Free Software
     18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
     19  */
     20 #include "config.h"
     21 #ifdef _WIN32
     22 #include <windows.h>
     23 #include <winioctl.h>
     24 #else
     25 #include <sys/types.h>
     26 #include <sys/mman.h>
     27 #include <sys/ioctl.h>
     28 #endif
     29 #ifdef CONFIG_SOLARIS
     30 #include <sys/ioccom.h>
     31 #endif
     32 #include <stdlib.h>
     33 #include <stdio.h>
     34 #include <stdarg.h>
     35 #include <string.h>
     36 #include <errno.h>
     37 #include <unistd.h>
     38 #include <inttypes.h>
     39 
     40 #include "cpu.h"
     41 #include "exec-all.h"
     42 #include "qemu-common.h"
     43 
     44 #ifdef CONFIG_KQEMU
     45 
     46 #define DEBUG
     47 //#define PROFILE
     48 
     49 
     50 #ifdef DEBUG
     51 #  define LOG_INT(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
     52 #  define LOG_INT_STATE(env) log_cpu_state_mask(CPU_LOG_INT, (env), 0)
     53 #else
     54 #  define LOG_INT(...) do { } while (0)
     55 #  define LOG_INT_STATE(env) do { } while (0)
     56 #endif
     57 
     58 #include <unistd.h>
     59 #include <fcntl.h>
     60 #include "kqemu.h"
     61 
     62 #ifdef _WIN32
     63 #define KQEMU_DEVICE "\\\\.\\kqemu"
     64 #else
     65 #define KQEMU_DEVICE "/dev/kqemu"
     66 #endif
     67 
     68 static void qpi_init(void);
     69 
     70 #ifdef _WIN32
     71 #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
     72 HANDLE kqemu_fd = KQEMU_INVALID_FD;
     73 #define kqemu_closefd(x) CloseHandle(x)
     74 #else
     75 #define KQEMU_INVALID_FD -1
     76 int kqemu_fd = KQEMU_INVALID_FD;
     77 #define kqemu_closefd(x) close(x)
     78 #endif
     79 
     80 /* 0 = not allowed
     81    1 = user kqemu
     82    2 = kernel kqemu
     83 */
     84 int kqemu_allowed = 1;
     85 uint64_t *pages_to_flush;
     86 unsigned int nb_pages_to_flush;
     87 uint64_t *ram_pages_to_update;
     88 unsigned int nb_ram_pages_to_update;
     89 uint64_t *modified_ram_pages;
     90 unsigned int nb_modified_ram_pages;
     91 uint8_t *modified_ram_pages_table;
     92 int qpi_io_memory;
     93 uint32_t kqemu_comm_base; /* physical address of the QPI communication page */
     94 ram_addr_t kqemu_phys_ram_size;
     95 uint8_t *kqemu_phys_ram_base;
     96 
     97 #define cpuid(index, eax, ebx, ecx, edx) \
     98   asm volatile ("cpuid" \
     99                 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
    100                 : "0" (index))
    101 
    102 #ifdef __x86_64__
    103 static int is_cpuid_supported(void)
    104 {
    105     return 1;
    106 }
    107 #else
    108 static int is_cpuid_supported(void)
    109 {
    110     int v0, v1;
    111     asm volatile ("pushf\n"
    112                   "popl %0\n"
    113                   "movl %0, %1\n"
    114                   "xorl $0x00200000, %0\n"
    115                   "pushl %0\n"
    116                   "popf\n"
    117                   "pushf\n"
    118                   "popl %0\n"
    119                   : "=a" (v0), "=d" (v1)
    120                   :
    121                   : "cc");
    122     return (v0 != v1);
    123 }
    124 #endif
    125 
    126 static void kqemu_update_cpuid(CPUState *env)
    127 {
    128     int critical_features_mask, features, ext_features, ext_features_mask;
    129     uint32_t eax, ebx, ecx, edx;
    130 
    131     /* the following features are kept identical on the host and
    132        target cpus because they are important for user code. Strictly
    133        speaking, only SSE really matters because the OS must support
    134        it if the user code uses it. */
    135     critical_features_mask =
    136         CPUID_CMOV | CPUID_CX8 |
    137         CPUID_FXSR | CPUID_MMX | CPUID_SSE |
    138         CPUID_SSE2 | CPUID_SEP;
    139     ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
    140     if (!is_cpuid_supported()) {
    141         features = 0;
    142         ext_features = 0;
    143     } else {
    144         cpuid(1, eax, ebx, ecx, edx);
    145         features = edx;
    146         ext_features = ecx;
    147     }
    148 #ifdef __x86_64__
    149     /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
    150        compatibility mode, so in order to have the best performances
    151        it is better not to use it */
    152     features &= ~CPUID_SEP;
    153 #endif
    154     env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
    155         (features & critical_features_mask);
    156     env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
    157         (ext_features & ext_features_mask);
    158     /* XXX: we could update more of the target CPUID state so that the
    159        non accelerated code sees exactly the same CPU features as the
    160        accelerated code */
    161 }
    162 
    163 int kqemu_init(CPUState *env)
    164 {
    165     struct kqemu_init kinit;
    166     int ret, version;
    167 #ifdef _WIN32
    168     DWORD temp;
    169 #endif
    170 
    171     if (!kqemu_allowed)
    172         return -1;
    173 
    174 #ifdef _WIN32
    175     kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
    176                           FILE_SHARE_READ | FILE_SHARE_WRITE,
    177                           NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
    178                           NULL);
    179     if (kqemu_fd == KQEMU_INVALID_FD) {
    180         fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %lu\n",
    181                 KQEMU_DEVICE, GetLastError());
    182         return -1;
    183     }
    184 #else
    185     kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
    186     if (kqemu_fd == KQEMU_INVALID_FD) {
    187         fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
    188                 KQEMU_DEVICE, strerror(errno));
    189         return -1;
    190     }
    191 #endif
    192     version = 0;
    193 #ifdef _WIN32
    194     DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
    195                     &version, sizeof(version), &temp, NULL);
    196 #else
    197     ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
    198 #endif
    199     if (version != KQEMU_VERSION) {
    200         fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
    201                 version, KQEMU_VERSION);
    202         goto fail;
    203     }
    204 
    205     pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
    206                                   sizeof(uint64_t));
    207     if (!pages_to_flush)
    208         goto fail;
    209 
    210     ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
    211                                        sizeof(uint64_t));
    212     if (!ram_pages_to_update)
    213         goto fail;
    214 
    215     modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
    216                                       sizeof(uint64_t));
    217     if (!modified_ram_pages)
    218         goto fail;
    219     modified_ram_pages_table =
    220         qemu_mallocz(kqemu_phys_ram_size >> TARGET_PAGE_BITS);
    221     if (!modified_ram_pages_table)
    222         goto fail;
    223 
    224     memset(&kinit, 0, sizeof(kinit)); /* set the paddings to zero */
    225     kinit.ram_base = kqemu_phys_ram_base;
    226     kinit.ram_size = kqemu_phys_ram_size;
    227     kinit.ram_dirty = phys_ram_dirty;
    228     kinit.pages_to_flush = pages_to_flush;
    229     kinit.ram_pages_to_update = ram_pages_to_update;
    230     kinit.modified_ram_pages = modified_ram_pages;
    231 #ifdef _WIN32
    232     ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &kinit, sizeof(kinit),
    233                           NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
    234 #else
    235     ret = ioctl(kqemu_fd, KQEMU_INIT, &kinit);
    236 #endif
    237     if (ret < 0) {
    238         fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
    239     fail:
    240         kqemu_closefd(kqemu_fd);
    241         kqemu_fd = KQEMU_INVALID_FD;
    242         return -1;
    243     }
    244     kqemu_update_cpuid(env);
    245     env->kqemu_enabled = kqemu_allowed;
    246     nb_pages_to_flush = 0;
    247     nb_ram_pages_to_update = 0;
    248 
    249     qpi_init();
    250     return 0;
    251 }
    252 
    253 void kqemu_flush_page(CPUState *env, target_ulong addr)
    254 {
    255     LOG_INT("kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
    256     if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
    257         nb_pages_to_flush = KQEMU_FLUSH_ALL;
    258     else
    259         pages_to_flush[nb_pages_to_flush++] = addr;
    260 }
    261 
    262 void kqemu_flush(CPUState *env, int global)
    263 {
    264     LOG_INT("kqemu_flush:\n");
    265     nb_pages_to_flush = KQEMU_FLUSH_ALL;
    266 }
    267 
    268 void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
    269 {
    270     LOG_INT("kqemu_set_notdirty: addr=%08lx\n",
    271                 (unsigned long)ram_addr);
    272     /* we only track transitions to dirty state */
    273     if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
    274         return;
    275     if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
    276         nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
    277     else
    278         ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
    279 }
    280 
    281 static void kqemu_reset_modified_ram_pages(void)
    282 {
    283     int i;
    284     unsigned long page_index;
    285 
    286     for(i = 0; i < nb_modified_ram_pages; i++) {
    287         page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
    288         modified_ram_pages_table[page_index] = 0;
    289     }
    290     nb_modified_ram_pages = 0;
    291 }
    292 
    293 void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
    294 {
    295     unsigned long page_index;
    296     int ret;
    297 #ifdef _WIN32
    298     DWORD temp;
    299 #endif
    300 
    301     page_index = ram_addr >> TARGET_PAGE_BITS;
    302     if (!modified_ram_pages_table[page_index]) {
    303 #if 0
    304         printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
    305 #endif
    306         modified_ram_pages_table[page_index] = 1;
    307         modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
    308         if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
    309             /* flush */
    310 #ifdef _WIN32
    311             ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
    312                                   &nb_modified_ram_pages,
    313                                   sizeof(nb_modified_ram_pages),
    314                                   NULL, 0, &temp, NULL);
    315 #else
    316             ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
    317                         &nb_modified_ram_pages);
    318 #endif
    319             kqemu_reset_modified_ram_pages();
    320         }
    321     }
    322 }
    323 
    324 void kqemu_set_phys_mem(uint64_t start_addr, ram_addr_t size,
    325                         ram_addr_t phys_offset)
    326 {
    327     struct kqemu_phys_mem kphys_mem1, *kphys_mem = &kphys_mem1;
    328     uint64_t end;
    329     int ret, io_index;
    330 
    331     end = (start_addr + size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
    332     start_addr &= TARGET_PAGE_MASK;
    333     kphys_mem->phys_addr = start_addr;
    334     kphys_mem->size = end - start_addr;
    335     kphys_mem->ram_addr = phys_offset & TARGET_PAGE_MASK;
    336     io_index = phys_offset & ~TARGET_PAGE_MASK;
    337     switch(io_index) {
    338     case IO_MEM_RAM:
    339         kphys_mem->io_index = KQEMU_IO_MEM_RAM;
    340         break;
    341     case IO_MEM_ROM:
    342         kphys_mem->io_index = KQEMU_IO_MEM_ROM;
    343         break;
    344     default:
    345         if (qpi_io_memory == io_index) {
    346             kphys_mem->io_index = KQEMU_IO_MEM_COMM;
    347         } else {
    348             kphys_mem->io_index = KQEMU_IO_MEM_UNASSIGNED;
    349         }
    350         break;
    351     }
    352 #ifdef _WIN32
    353     {
    354         DWORD temp;
    355         ret = DeviceIoControl(kqemu_fd, KQEMU_SET_PHYS_MEM,
    356                               kphys_mem, sizeof(*kphys_mem),
    357                               NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
    358     }
    359 #else
    360     ret = ioctl(kqemu_fd, KQEMU_SET_PHYS_MEM, kphys_mem);
    361 #endif
    362     if (ret < 0) {
    363         fprintf(stderr, "kqemu: KQEMU_SET_PHYS_PAGE error=%d: start_addr=0x%016" PRIx64 " size=0x%08lx phys_offset=0x%08lx\n",
    364                 ret, start_addr,
    365                 (unsigned long)size, (unsigned long)phys_offset);
    366     }
    367 }
    368 
    369 struct fpstate {
    370     uint16_t fpuc;
    371     uint16_t dummy1;
    372     uint16_t fpus;
    373     uint16_t dummy2;
    374     uint16_t fptag;
    375     uint16_t dummy3;
    376 
    377     uint32_t fpip;
    378     uint32_t fpcs;
    379     uint32_t fpoo;
    380     uint32_t fpos;
    381     uint8_t fpregs1[8 * 10];
    382 };
    383 
    384 struct fpxstate {
    385     uint16_t fpuc;
    386     uint16_t fpus;
    387     uint16_t fptag;
    388     uint16_t fop;
    389     uint32_t fpuip;
    390     uint16_t cs_sel;
    391     uint16_t dummy0;
    392     uint32_t fpudp;
    393     uint16_t ds_sel;
    394     uint16_t dummy1;
    395     uint32_t mxcsr;
    396     uint32_t mxcsr_mask;
    397     uint8_t fpregs1[8 * 16];
    398     uint8_t xmm_regs[16 * 16];
    399     uint8_t dummy2[96];
    400 };
    401 
    402 static struct fpxstate fpx1 __attribute__((aligned(16)));
    403 
    404 static void restore_native_fp_frstor(CPUState *env)
    405 {
    406     int fptag, i, j;
    407     struct fpstate fp1, *fp = &fp1;
    408 
    409     fp->fpuc = env->fpuc;
    410     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    411     fptag = 0;
    412     for (i=7; i>=0; i--) {
    413 	fptag <<= 2;
    414 	if (env->fptags[i]) {
    415             fptag |= 3;
    416         } else {
    417             /* the FPU automatically computes it */
    418         }
    419     }
    420     fp->fptag = fptag;
    421     j = env->fpstt;
    422     for(i = 0;i < 8; i++) {
    423         memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
    424         j = (j + 1) & 7;
    425     }
    426     asm volatile ("frstor %0" : "=m" (*fp));
    427 }
    428 
    429 static void save_native_fp_fsave(CPUState *env)
    430 {
    431     int fptag, i, j;
    432     uint16_t fpuc;
    433     struct fpstate fp1, *fp = &fp1;
    434 
    435     asm volatile ("fsave %0" : : "m" (*fp));
    436     env->fpuc = fp->fpuc;
    437     env->fpstt = (fp->fpus >> 11) & 7;
    438     env->fpus = fp->fpus & ~0x3800;
    439     fptag = fp->fptag;
    440     for(i = 0;i < 8; i++) {
    441         env->fptags[i] = ((fptag & 3) == 3);
    442         fptag >>= 2;
    443     }
    444     j = env->fpstt;
    445     for(i = 0;i < 8; i++) {
    446         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
    447         j = (j + 1) & 7;
    448     }
    449     /* we must restore the default rounding state */
    450     fpuc = 0x037f | (env->fpuc & (3 << 10));
    451     asm volatile("fldcw %0" : : "m" (fpuc));
    452 }
    453 
    454 static void restore_native_fp_fxrstor(CPUState *env)
    455 {
    456     struct fpxstate *fp = &fpx1;
    457     int i, j, fptag;
    458 
    459     fp->fpuc = env->fpuc;
    460     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    461     fptag = 0;
    462     for(i = 0; i < 8; i++)
    463         fptag |= (env->fptags[i] << i);
    464     fp->fptag = fptag ^ 0xff;
    465 
    466     j = env->fpstt;
    467     for(i = 0;i < 8; i++) {
    468         memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
    469         j = (j + 1) & 7;
    470     }
    471     if (env->cpuid_features & CPUID_SSE) {
    472         fp->mxcsr = env->mxcsr;
    473         /* XXX: check if DAZ is not available */
    474         fp->mxcsr_mask = 0xffff;
    475         memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
    476     }
    477     asm volatile ("fxrstor %0" : "=m" (*fp));
    478 }
    479 
    480 static void save_native_fp_fxsave(CPUState *env)
    481 {
    482     struct fpxstate *fp = &fpx1;
    483     int fptag, i, j;
    484     uint16_t fpuc;
    485 
    486     asm volatile ("fxsave %0" : : "m" (*fp));
    487     env->fpuc = fp->fpuc;
    488     env->fpstt = (fp->fpus >> 11) & 7;
    489     env->fpus = fp->fpus & ~0x3800;
    490     fptag = fp->fptag ^ 0xff;
    491     for(i = 0;i < 8; i++) {
    492         env->fptags[i] = (fptag >> i) & 1;
    493     }
    494     j = env->fpstt;
    495     for(i = 0;i < 8; i++) {
    496         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
    497         j = (j + 1) & 7;
    498     }
    499     if (env->cpuid_features & CPUID_SSE) {
    500         env->mxcsr = fp->mxcsr;
    501         memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
    502     }
    503 
    504     /* we must restore the default rounding state */
    505     asm volatile ("fninit");
    506     fpuc = 0x037f | (env->fpuc & (3 << 10));
    507     asm volatile("fldcw %0" : : "m" (fpuc));
    508 }
    509 
    510 static int do_syscall(CPUState *env,
    511                       struct kqemu_cpu_state *kenv)
    512 {
    513     int selector;
    514 
    515     selector = (env->star >> 32) & 0xffff;
    516 #ifdef TARGET_X86_64
    517     if (env->hflags & HF_LMA_MASK) {
    518         int code64;
    519 
    520         env->regs[R_ECX] = kenv->next_eip;
    521         env->regs[11] = env->eflags;
    522 
    523         code64 = env->hflags & HF_CS64_MASK;
    524 
    525         cpu_x86_set_cpl(env, 0);
    526         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
    527                                0, 0xffffffff,
    528                                DESC_G_MASK | DESC_P_MASK |
    529                                DESC_S_MASK |
    530                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
    531         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
    532                                0, 0xffffffff,
    533                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
    534                                DESC_S_MASK |
    535                                DESC_W_MASK | DESC_A_MASK);
    536         env->eflags &= ~env->fmask;
    537         if (code64)
    538             env->eip = env->lstar;
    539         else
    540             env->eip = env->cstar;
    541     } else
    542 #endif
    543     {
    544         env->regs[R_ECX] = (uint32_t)kenv->next_eip;
    545 
    546         cpu_x86_set_cpl(env, 0);
    547         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
    548                            0, 0xffffffff,
    549                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
    550                                DESC_S_MASK |
    551                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
    552         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
    553                                0, 0xffffffff,
    554                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
    555                                DESC_S_MASK |
    556                                DESC_W_MASK | DESC_A_MASK);
    557         env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
    558         env->eip = (uint32_t)env->star;
    559     }
    560     return 2;
    561 }
    562 
    563 #ifdef CONFIG_PROFILER
    564 
    565 #define PC_REC_SIZE 1
    566 #define PC_REC_HASH_BITS 16
    567 #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
    568 
    569 typedef struct PCRecord {
    570     unsigned long pc;
    571     int64_t count;
    572     struct PCRecord *next;
    573 } PCRecord;
    574 
    575 static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
    576 static int nb_pc_records;
    577 
    578 static void kqemu_record_pc(unsigned long pc)
    579 {
    580     unsigned long h;
    581     PCRecord **pr, *r;
    582 
    583     h = pc / PC_REC_SIZE;
    584     h = h ^ (h >> PC_REC_HASH_BITS);
    585     h &= (PC_REC_HASH_SIZE - 1);
    586     pr = &pc_rec_hash[h];
    587     for(;;) {
    588         r = *pr;
    589         if (r == NULL)
    590             break;
    591         if (r->pc == pc) {
    592             r->count++;
    593             return;
    594         }
    595         pr = &r->next;
    596     }
    597     r = malloc(sizeof(PCRecord));
    598     r->count = 1;
    599     r->pc = pc;
    600     r->next = NULL;
    601     *pr = r;
    602     nb_pc_records++;
    603 }
    604 
    605 static int pc_rec_cmp(const void *p1, const void *p2)
    606 {
    607     PCRecord *r1 = *(PCRecord **)p1;
    608     PCRecord *r2 = *(PCRecord **)p2;
    609     if (r1->count < r2->count)
    610         return 1;
    611     else if (r1->count == r2->count)
    612         return 0;
    613     else
    614         return -1;
    615 }
    616 
    617 static void kqemu_record_flush(void)
    618 {
    619     PCRecord *r, *r_next;
    620     int h;
    621 
    622     for(h = 0; h < PC_REC_HASH_SIZE; h++) {
    623         for(r = pc_rec_hash[h]; r != NULL; r = r_next) {
    624             r_next = r->next;
    625             free(r);
    626         }
    627         pc_rec_hash[h] = NULL;
    628     }
    629     nb_pc_records = 0;
    630 }
    631 
    632 void kqemu_record_dump(void)
    633 {
    634     PCRecord **pr, *r;
    635     int i, h;
    636     FILE *f;
    637     int64_t total, sum;
    638 
    639     pr = malloc(sizeof(PCRecord *) * nb_pc_records);
    640     i = 0;
    641     total = 0;
    642     for(h = 0; h < PC_REC_HASH_SIZE; h++) {
    643         for(r = pc_rec_hash[h]; r != NULL; r = r->next) {
    644             pr[i++] = r;
    645             total += r->count;
    646         }
    647     }
    648     qsort(pr, nb_pc_records, sizeof(PCRecord *), pc_rec_cmp);
    649 
    650     f = fopen("/tmp/kqemu.stats", "w");
    651     if (!f) {
    652         perror("/tmp/kqemu.stats");
    653         exit(1);
    654     }
    655     fprintf(f, "total: %" PRId64 "\n", total);
    656     sum = 0;
    657     for(i = 0; i < nb_pc_records; i++) {
    658         r = pr[i];
    659         sum += r->count;
    660         fprintf(f, "%08lx: %" PRId64 " %0.2f%% %0.2f%%\n",
    661                 r->pc,
    662                 r->count,
    663                 (double)r->count / (double)total * 100.0,
    664                 (double)sum / (double)total * 100.0);
    665     }
    666     fclose(f);
    667     free(pr);
    668 
    669     kqemu_record_flush();
    670 }
    671 #endif
    672 
    673 static inline void kqemu_load_seg(struct kqemu_segment_cache *ksc,
    674                                   const SegmentCache *sc)
    675 {
    676     ksc->selector = sc->selector;
    677     ksc->flags = sc->flags;
    678     ksc->limit = sc->limit;
    679     ksc->base = sc->base;
    680 }
    681 
    682 static inline void kqemu_save_seg(SegmentCache *sc,
    683                                   const struct kqemu_segment_cache *ksc)
    684 {
    685     sc->selector = ksc->selector;
    686     sc->flags = ksc->flags;
    687     sc->limit = ksc->limit;
    688     sc->base = ksc->base;
    689 }
    690 
    691 int kqemu_cpu_exec(CPUState *env)
    692 {
    693     struct kqemu_cpu_state kcpu_state, *kenv = &kcpu_state;
    694     int ret, cpl, i;
    695 #ifdef CONFIG_PROFILER
    696     int64_t ti;
    697 #endif
    698 #ifdef _WIN32
    699     DWORD temp;
    700 #endif
    701 
    702 #ifdef CONFIG_PROFILER
    703     ti = profile_getclock();
    704 #endif
    705     LOG_INT("kqemu: cpu_exec: enter\n");
    706     LOG_INT_STATE(env);
    707     for(i = 0; i < CPU_NB_REGS; i++)
    708         kenv->regs[i] = env->regs[i];
    709     kenv->eip = env->eip;
    710     kenv->eflags = env->eflags;
    711     for(i = 0; i < 6; i++)
    712         kqemu_load_seg(&kenv->segs[i], &env->segs[i]);
    713     kqemu_load_seg(&kenv->ldt, &env->ldt);
    714     kqemu_load_seg(&kenv->tr, &env->tr);
    715     kqemu_load_seg(&kenv->gdt, &env->gdt);
    716     kqemu_load_seg(&kenv->idt, &env->idt);
    717     kenv->cr0 = env->cr[0];
    718     kenv->cr2 = env->cr[2];
    719     kenv->cr3 = env->cr[3];
    720     kenv->cr4 = env->cr[4];
    721     kenv->a20_mask = env->a20_mask;
    722     kenv->efer = env->efer;
    723     kenv->tsc_offset = 0;
    724     kenv->star = env->star;
    725     kenv->sysenter_cs = env->sysenter_cs;
    726     kenv->sysenter_esp = env->sysenter_esp;
    727     kenv->sysenter_eip = env->sysenter_eip;
    728 #ifdef TARGET_X86_64
    729     kenv->lstar = env->lstar;
    730     kenv->cstar = env->cstar;
    731     kenv->fmask = env->fmask;
    732     kenv->kernelgsbase = env->kernelgsbase;
    733 #endif
    734     if (env->dr[7] & 0xff) {
    735         kenv->dr7 = env->dr[7];
    736         kenv->dr0 = env->dr[0];
    737         kenv->dr1 = env->dr[1];
    738         kenv->dr2 = env->dr[2];
    739         kenv->dr3 = env->dr[3];
    740     } else {
    741         kenv->dr7 = 0;
    742     }
    743     kenv->dr6 = env->dr[6];
    744     cpl = (env->hflags & HF_CPL_MASK);
    745     kenv->cpl = cpl;
    746     kenv->nb_pages_to_flush = nb_pages_to_flush;
    747     kenv->user_only = (env->kqemu_enabled == 1);
    748     kenv->nb_ram_pages_to_update = nb_ram_pages_to_update;
    749     nb_ram_pages_to_update = 0;
    750     kenv->nb_modified_ram_pages = nb_modified_ram_pages;
    751 
    752     kqemu_reset_modified_ram_pages();
    753 
    754     if (env->cpuid_features & CPUID_FXSR)
    755         restore_native_fp_fxrstor(env);
    756     else
    757         restore_native_fp_frstor(env);
    758 
    759 #ifdef _WIN32
    760     if (DeviceIoControl(kqemu_fd, KQEMU_EXEC,
    761                         kenv, sizeof(struct kqemu_cpu_state),
    762                         kenv, sizeof(struct kqemu_cpu_state),
    763                         &temp, NULL)) {
    764         ret = kenv->retval;
    765     } else {
    766         ret = -1;
    767     }
    768 #else
    769     ioctl(kqemu_fd, KQEMU_EXEC, kenv);
    770     ret = kenv->retval;
    771 #endif
    772     if (env->cpuid_features & CPUID_FXSR)
    773         save_native_fp_fxsave(env);
    774     else
    775         save_native_fp_fsave(env);
    776 
    777     for(i = 0; i < CPU_NB_REGS; i++)
    778         env->regs[i] = kenv->regs[i];
    779     env->eip = kenv->eip;
    780     env->eflags = kenv->eflags;
    781     for(i = 0; i < 6; i++)
    782         kqemu_save_seg(&env->segs[i], &kenv->segs[i]);
    783     cpu_x86_set_cpl(env, kenv->cpl);
    784     kqemu_save_seg(&env->ldt, &kenv->ldt);
    785     env->cr[0] = kenv->cr0;
    786     env->cr[4] = kenv->cr4;
    787     env->cr[3] = kenv->cr3;
    788     env->cr[2] = kenv->cr2;
    789     env->dr[6] = kenv->dr6;
    790 #ifdef TARGET_X86_64
    791     env->kernelgsbase = kenv->kernelgsbase;
    792 #endif
    793 
    794     /* flush pages as indicated by kqemu */
    795     if (kenv->nb_pages_to_flush >= KQEMU_FLUSH_ALL) {
    796         tlb_flush(env, 1);
    797     } else {
    798         for(i = 0; i < kenv->nb_pages_to_flush; i++) {
    799             tlb_flush_page(env, pages_to_flush[i]);
    800         }
    801     }
    802     nb_pages_to_flush = 0;
    803 
    804 #ifdef CONFIG_PROFILER
    805     kqemu_time += profile_getclock() - ti;
    806     kqemu_exec_count++;
    807 #endif
    808 
    809     if (kenv->nb_ram_pages_to_update > 0) {
    810         cpu_tlb_update_dirty(env);
    811     }
    812 
    813     if (kenv->nb_modified_ram_pages > 0) {
    814         for(i = 0; i < kenv->nb_modified_ram_pages; i++) {
    815             unsigned long addr;
    816             addr = modified_ram_pages[i];
    817             tb_invalidate_phys_page_range(addr, addr + TARGET_PAGE_SIZE, 0);
    818         }
    819     }
    820 
    821     /* restore the hidden flags */
    822     {
    823         unsigned int new_hflags;
    824 #ifdef TARGET_X86_64
    825         if ((env->hflags & HF_LMA_MASK) &&
    826             (env->segs[R_CS].flags & DESC_L_MASK)) {
    827             /* long mode */
    828             new_hflags = HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
    829         } else
    830 #endif
    831         {
    832             /* legacy / compatibility case */
    833             new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
    834                 >> (DESC_B_SHIFT - HF_CS32_SHIFT);
    835             new_hflags |= (env->segs[R_SS].flags & DESC_B_MASK)
    836                 >> (DESC_B_SHIFT - HF_SS32_SHIFT);
    837             if (!(env->cr[0] & CR0_PE_MASK) ||
    838                    (env->eflags & VM_MASK) ||
    839                    !(env->hflags & HF_CS32_MASK)) {
    840                 /* XXX: try to avoid this test. The problem comes from the
    841                    fact that is real mode or vm86 mode we only modify the
    842                    'base' and 'selector' fields of the segment cache to go
    843                    faster. A solution may be to force addseg to one in
    844                    translate-i386.c. */
    845                 new_hflags |= HF_ADDSEG_MASK;
    846             } else {
    847                 new_hflags |= ((env->segs[R_DS].base |
    848                                 env->segs[R_ES].base |
    849                                 env->segs[R_SS].base) != 0) <<
    850                     HF_ADDSEG_SHIFT;
    851             }
    852         }
    853         env->hflags = (env->hflags &
    854            ~(HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK)) |
    855             new_hflags;
    856     }
    857     /* update FPU flags */
    858     env->hflags = (env->hflags & ~(HF_MP_MASK | HF_EM_MASK | HF_TS_MASK)) |
    859         ((env->cr[0] << (HF_MP_SHIFT - 1)) & (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK));
    860     if (env->cr[4] & CR4_OSFXSR_MASK)
    861         env->hflags |= HF_OSFXSR_MASK;
    862     else
    863         env->hflags &= ~HF_OSFXSR_MASK;
    864 
    865     LOG_INT("kqemu: kqemu_cpu_exec: ret=0x%x\n", ret);
    866     if (ret == KQEMU_RET_SYSCALL) {
    867         /* syscall instruction */
    868         return do_syscall(env, kenv);
    869     } else
    870     if ((ret & 0xff00) == KQEMU_RET_INT) {
    871         env->exception_index = ret & 0xff;
    872         env->error_code = 0;
    873         env->exception_is_int = 1;
    874         env->exception_next_eip = kenv->next_eip;
    875 #ifdef CONFIG_PROFILER
    876         kqemu_ret_int_count++;
    877 #endif
    878         LOG_INT("kqemu: interrupt v=%02x:\n", env->exception_index);
    879         LOG_INT_STATE(env);
    880         return 1;
    881     } else if ((ret & 0xff00) == KQEMU_RET_EXCEPTION) {
    882         env->exception_index = ret & 0xff;
    883         env->error_code = kenv->error_code;
    884         env->exception_is_int = 0;
    885         env->exception_next_eip = 0;
    886 #ifdef CONFIG_PROFILER
    887         kqemu_ret_excp_count++;
    888 #endif
    889         LOG_INT("kqemu: exception v=%02x e=%04x:\n",
    890                     env->exception_index, env->error_code);
    891         LOG_INT_STATE(env);
    892         return 1;
    893     } else if (ret == KQEMU_RET_INTR) {
    894 #ifdef CONFIG_PROFILER
    895         kqemu_ret_intr_count++;
    896 #endif
    897         LOG_INT_STATE(env);
    898         return 0;
    899     } else if (ret == KQEMU_RET_SOFTMMU) {
    900 #ifdef CONFIG_PROFILER
    901         {
    902             unsigned long pc = env->eip + env->segs[R_CS].base;
    903             kqemu_record_pc(pc);
    904         }
    905 #endif
    906         LOG_INT_STATE(env);
    907         return 2;
    908     } else {
    909         cpu_dump_state(env, stderr, fprintf, 0);
    910         fprintf(stderr, "Unsupported return value: 0x%x\n", ret);
    911         exit(1);
    912     }
    913     return 0;
    914 }
    915 
    916 void kqemu_cpu_interrupt(CPUState *env)
    917 {
    918 #if defined(_WIN32)
    919     /* cancelling the I/O request causes KQEMU to finish executing the
    920        current block and successfully returning. */
    921     CancelIo(kqemu_fd);
    922 #endif
    923 }
    924 
    925 /*
    926    QEMU paravirtualization interface. The current interface only
    927    allows to modify the IF and IOPL flags when running in
    928    kqemu.
    929 
    930    At this point it is not very satisfactory. I leave it for reference
    931    as it adds little complexity.
    932 */
    933 
    934 #define QPI_COMM_PAGE_PHYS_ADDR 0xff000000
    935 
    936 static uint32_t qpi_mem_readb(void *opaque, target_phys_addr_t addr)
    937 {
    938     return 0;
    939 }
    940 
    941 static uint32_t qpi_mem_readw(void *opaque, target_phys_addr_t addr)
    942 {
    943     return 0;
    944 }
    945 
    946 static void qpi_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
    947 {
    948 }
    949 
    950 static void qpi_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
    951 {
    952 }
    953 
    954 static uint32_t qpi_mem_readl(void *opaque, target_phys_addr_t addr)
    955 {
    956     CPUState *env;
    957 
    958     env = cpu_single_env;
    959     if (!env)
    960         return 0;
    961     return env->eflags & (IF_MASK | IOPL_MASK);
    962 }
    963 
    964 /* Note: after writing to this address, the guest code must make sure
    965    it is exiting the current TB. pushf/popf can be used for that
    966    purpose. */
    967 static void qpi_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
    968 {
    969     CPUState *env;
    970 
    971     env = cpu_single_env;
    972     if (!env)
    973         return;
    974     env->eflags = (env->eflags & ~(IF_MASK | IOPL_MASK)) |
    975         (val & (IF_MASK | IOPL_MASK));
    976 }
    977 
    978 static CPUReadMemoryFunc *qpi_mem_read[3] = {
    979     qpi_mem_readb,
    980     qpi_mem_readw,
    981     qpi_mem_readl,
    982 };
    983 
    984 static CPUWriteMemoryFunc *qpi_mem_write[3] = {
    985     qpi_mem_writeb,
    986     qpi_mem_writew,
    987     qpi_mem_writel,
    988 };
    989 
    990 static void qpi_init(void)
    991 {
    992     kqemu_comm_base = 0xff000000 | 1;
    993     qpi_io_memory = cpu_register_io_memory(
    994                                            qpi_mem_read,
    995                                            qpi_mem_write, NULL);
    996     cpu_register_physical_memory(kqemu_comm_base & ~0xfff,
    997                                  0x1000, qpi_io_memory);
    998 }
    999 #endif
   1000