(linenum→info "unix/slp.c:2238")

qemu/0.9.1/kqemu.c

    1: /*
    2:  *  KQEMU support
    3:  *
    4:  *  Copyright (c) 2005 Fabrice Bellard
    5:  *
    6:  * This library is free software; you can redistribute it and/or
    7:  * modify it under the terms of the GNU Lesser General Public
    8:  * License as published by the Free Software Foundation; either
    9:  * version 2 of the License, or (at your option) any later version.
   10:  *
   11:  * This library is distributed in the hope that it will be useful,
   12:  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   13:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14:  * Lesser General Public License for more details.
   15:  *
   16:  * You should have received a copy of the GNU Lesser General Public
   17:  * License along with this library; if not, write to the Free Software
   18:  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   19:  */
   20: #include "config.h"
   21: #ifdef _WIN32
   22: #define WIN32_LEAN_AND_MEAN
   23: #include <windows.h>
   24: #include <winioctl.h>
   25: #else
   26: #include <sys/types.h>
   27: #include <sys/mman.h>
   28: #include <sys/ioctl.h>
   29: #endif
   30: #ifdef HOST_SOLARIS
   31: #include <sys/ioccom.h>
   32: #endif
   33: #include <stdlib.h>
   34: #include <stdio.h>
   35: #include <stdarg.h>
   36: #include <string.h>
   37: #include <errno.h>
   38: #include <unistd.h>
   39: #include <inttypes.h>
   40: 
   41: #include "cpu.h"
   42: #include "exec-all.h"
   43: 
   44: #ifdef USE_KQEMU
   45: 
   46: #define DEBUG
   47: //#define PROFILE
   48: 
   49: #include <unistd.h>
   50: #include <fcntl.h>
   51: #include "kqemu.h"
   52: 
   53: /* compatibility stuff */
   54: #ifndef KQEMU_RET_SYSCALL
   55: #define KQEMU_RET_SYSCALL   0x0300 /* syscall insn */
   56: #endif
   57: #ifndef KQEMU_MAX_RAM_PAGES_TO_UPDATE
   58: #define KQEMU_MAX_RAM_PAGES_TO_UPDATE 512
   59: #define KQEMU_RAM_PAGES_UPDATE_ALL (KQEMU_MAX_RAM_PAGES_TO_UPDATE + 1)
   60: #endif
   61: #ifndef KQEMU_MAX_MODIFIED_RAM_PAGES
   62: #define KQEMU_MAX_MODIFIED_RAM_PAGES 512
   63: #endif
   64: 
   65: #ifdef _WIN32
   66: #define KQEMU_DEVICE "\\\\.\\kqemu"
   67: #else
   68: #define KQEMU_DEVICE "/dev/kqemu"
   69: #endif
   70: 
   71: #ifdef _WIN32
   72: #define KQEMU_INVALID_FD INVALID_HANDLE_VALUE
   73: HANDLE kqemu_fd = KQEMU_INVALID_FD;
   74: #define kqemu_closefd(x) CloseHandle(x)
   75: #else
   76: #define KQEMU_INVALID_FD -1
   77: int kqemu_fd = KQEMU_INVALID_FD;
   78: #define kqemu_closefd(x) close(x)
   79: #endif
   80: 
   81: /* 0 = not allowed
   82:    1 = user kqemu
   83:    2 = kernel kqemu
   84: */
   85: int kqemu_allowed = 1;
   86: unsigned long *pages_to_flush;
   87: unsigned int nb_pages_to_flush;
   88: unsigned long *ram_pages_to_update;
   89: unsigned int nb_ram_pages_to_update;
   90: unsigned long *modified_ram_pages;
   91: unsigned int nb_modified_ram_pages;
   92: uint8_t *modified_ram_pages_table;
   93: extern uint32_t **l1_phys_map;
   94: 
   95: #define cpuid(index, eax, ebx, ecx, edx) \
   96:   asm volatile ("cpuid" \
   97:                 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \
   98:                 : "0" (index))
   99: 
  100: #ifdef __x86_64__
  101: static int is_cpuid_supported(void)
  102: {
  103:     return 1;
  104: }
  105: #else
  106: static int is_cpuid_supported(void)
  107: {
  108:     int v0, v1;
  109:     asm volatile ("pushf\n"
  110:                   "popl %0\n"
  111:                   "movl %0, %1\n"
  112:                   "xorl $0x00200000, %0\n"
  113:                   "pushl %0\n"
  114:                   "popf\n"
  115:                   "pushf\n"
  116:                   "popl %0\n"
  117:                   : "=a" (v0), "=d" (v1)
  118:                   :
  119:                   : "cc");
  120:     return (v0 != v1);
  121: }
  122: #endif
  123: 
  124: static void kqemu_update_cpuid(CPUState *env)
  125: {
  126:     int critical_features_mask, features, ext_features, ext_features_mask;
  127:     uint32_t eax, ebx, ecx, edx;
  128: 
  129:     /* the following features are kept identical on the host and
  130:        target cpus because they are important for user code. Strictly
  131:        speaking, only SSE really matters because the OS must support
  132:        it if the user code uses it. */
  133:     critical_features_mask =
  134:         CPUID_CMOV | CPUID_CX8 |
  135:         CPUID_FXSR | CPUID_MMX | CPUID_SSE |
  136:         CPUID_SSE2 | CPUID_SEP;
  137:     ext_features_mask = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR;
  138:     if (!is_cpuid_supported()) {
  139:         features = 0;
  140:         ext_features = 0;
  141:     } else {
  142:         cpuid(1, eax, ebx, ecx, edx);
  143:         features = edx;
  144:         ext_features = ecx;
  145:     }
  146: #ifdef __x86_64__
  147:     /* NOTE: on x86_64 CPUs, SYSENTER is not supported in
  148:        compatibility mode, so in order to have the best performances
  149:        it is better not to use it */
  150:     features &= ~CPUID_SEP;
  151: #endif
  152:     env->cpuid_features = (env->cpuid_features & ~critical_features_mask) |
  153:         (features & critical_features_mask);
  154:     env->cpuid_ext_features = (env->cpuid_ext_features & ~ext_features_mask) |
  155:         (ext_features & ext_features_mask);
  156:     /* XXX: we could update more of the target CPUID state so that the
  157:        non accelerated code sees exactly the same CPU features as the
  158:        accelerated code */
  159: }
  160: 
  161: int kqemu_init(CPUState *env)
  162: {
  163:     struct kqemu_init init;
  164:     int ret, version;
  165: #ifdef _WIN32
  166:     DWORD temp;
  167: #endif
  168: 
  169:     if (!kqemu_allowed)
  170:         return -1;
  171: 
  172: #ifdef _WIN32
  173:     kqemu_fd = CreateFile(KQEMU_DEVICE, GENERIC_WRITE | GENERIC_READ,
  174:                           FILE_SHARE_READ | FILE_SHARE_WRITE,
  175:                           NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL,
  176:                           NULL);
  177: #else
  178:     kqemu_fd = open(KQEMU_DEVICE, O_RDWR);
  179: #endif
  180:     if (kqemu_fd == KQEMU_INVALID_FD) {
  181:         fprintf(stderr, "Could not open '%s' - QEMU acceleration layer not activated: %s\n",
  182:                 KQEMU_DEVICE, strerror(errno));
  183:         return -1;
  184:     }
  185:     version = 0;
  186: #ifdef _WIN32
  187:     DeviceIoControl(kqemu_fd, KQEMU_GET_VERSION, NULL, 0,
  188:                     &version, sizeof(version), &temp, NULL);
  189: #else
  190:     ioctl(kqemu_fd, KQEMU_GET_VERSION, &version);
  191: #endif
  192:     if (version != KQEMU_VERSION) {
  193:         fprintf(stderr, "Version mismatch between kqemu module and qemu (%08x %08x) - disabling kqemu use\n",
  194:                 version, KQEMU_VERSION);
  195:         goto fail;
  196:     }
  197: 
  198:     pages_to_flush = qemu_vmalloc(KQEMU_MAX_PAGES_TO_FLUSH *
  199:                                   sizeof(unsigned long));
  200:     if (!pages_to_flush)
  201:         goto fail;
  202: 
  203:     ram_pages_to_update = qemu_vmalloc(KQEMU_MAX_RAM_PAGES_TO_UPDATE *
  204:                                        sizeof(unsigned long));
  205:     if (!ram_pages_to_update)
  206:         goto fail;
  207: 
  208:     modified_ram_pages = qemu_vmalloc(KQEMU_MAX_MODIFIED_RAM_PAGES *
  209:                                       sizeof(unsigned long));
  210:     if (!modified_ram_pages)
  211:         goto fail;
  212:     modified_ram_pages_table = qemu_mallocz(phys_ram_size >> TARGET_PAGE_BITS);
  213:     if (!modified_ram_pages_table)
  214:         goto fail;
  215: 
  216:     init.ram_base = phys_ram_base;
  217:     init.ram_size = phys_ram_size;
  218:     init.ram_dirty = phys_ram_dirty;
  219:     init.phys_to_ram_map = l1_phys_map;
  220:     init.pages_to_flush = pages_to_flush;
  221: #if KQEMU_VERSION >= 0x010200
  222:     init.ram_pages_to_update = ram_pages_to_update;
  223: #endif
  224: #if KQEMU_VERSION >= 0x010300
  225:     init.modified_ram_pages = modified_ram_pages;
  226: #endif
  227: #ifdef _WIN32
  228:     ret = DeviceIoControl(kqemu_fd, KQEMU_INIT, &init, sizeof(init),
  229:                           NULL, 0, &temp, NULL) == TRUE ? 0 : -1;
  230: #else
  231:     ret = ioctl(kqemu_fd, KQEMU_INIT, &init);
  232: #endif
  233:     if (ret < 0) {
  234:         fprintf(stderr, "Error %d while initializing QEMU acceleration layer - disabling it for now\n", ret);
  235:     fail:
  236:         kqemu_closefd(kqemu_fd);
  237:         kqemu_fd = KQEMU_INVALID_FD;
  238:         return -1;
  239:     }
  240:     kqemu_update_cpuid(env);
  241:     env->kqemu_enabled = kqemu_allowed;
  242:     nb_pages_to_flush = 0;
  243:     nb_ram_pages_to_update = 0;
  244:     return 0;
  245: }
  246: 
  247: void kqemu_flush_page(CPUState *env, target_ulong addr)
  248: {
  249: #if defined(DEBUG)
  250:     if (loglevel & CPU_LOG_INT) {
  251:         fprintf(logfile, "kqemu_flush_page: addr=" TARGET_FMT_lx "\n", addr);
  252:     }
  253: #endif
  254:     if (nb_pages_to_flush >= KQEMU_MAX_PAGES_TO_FLUSH)
  255:         nb_pages_to_flush = KQEMU_FLUSH_ALL;
  256:     else
  257:         pages_to_flush[nb_pages_to_flush++] = addr;
  258: }
  259: 
  260: void kqemu_flush(CPUState *env, int global)
  261: {
  262: #ifdef DEBUG
  263:     if (loglevel & CPU_LOG_INT) {
  264:         fprintf(logfile, "kqemu_flush:\n");
  265:     }
  266: #endif
  267:     nb_pages_to_flush = KQEMU_FLUSH_ALL;
  268: }
  269: 
  270: void kqemu_set_notdirty(CPUState *env, ram_addr_t ram_addr)
  271: {
  272: #ifdef DEBUG
  273:     if (loglevel & CPU_LOG_INT) {
  274:         fprintf(logfile, "kqemu_set_notdirty: addr=%08lx\n", ram_addr);
  275:     }
  276: #endif
  277:     /* we only track transitions to dirty state */
  278:     if (phys_ram_dirty[ram_addr >> TARGET_PAGE_BITS] != 0xff)
  279:         return;
  280:     if (nb_ram_pages_to_update >= KQEMU_MAX_RAM_PAGES_TO_UPDATE)
  281:         nb_ram_pages_to_update = KQEMU_RAM_PAGES_UPDATE_ALL;
  282:     else
  283:         ram_pages_to_update[nb_ram_pages_to_update++] = ram_addr;
  284: }
  285: 
  286: static void kqemu_reset_modified_ram_pages(void)
  287: {
  288:     int i;
  289:     unsigned long page_index;
  290: 
  291:     for(i = 0; i < nb_modified_ram_pages; i++) {
  292:         page_index = modified_ram_pages[i] >> TARGET_PAGE_BITS;
  293:         modified_ram_pages_table[page_index] = 0;
  294:     }
  295:     nb_modified_ram_pages = 0;
  296: }
  297: 
  298: void kqemu_modify_page(CPUState *env, ram_addr_t ram_addr)
  299: {
  300:     unsigned long page_index;
  301:     int ret;
  302: #ifdef _WIN32
  303:     DWORD temp;
  304: #endif
  305: 
  306:     page_index = ram_addr >> TARGET_PAGE_BITS;
  307:     if (!modified_ram_pages_table[page_index]) {
  308: #if 0
  309:         printf("%d: modify_page=%08lx\n", nb_modified_ram_pages, ram_addr);
  310: #endif
  311:         modified_ram_pages_table[page_index] = 1;
  312:         modified_ram_pages[nb_modified_ram_pages++] = ram_addr;
  313:         if (nb_modified_ram_pages >= KQEMU_MAX_MODIFIED_RAM_PAGES) {
  314:             /* flush */
  315: #ifdef _WIN32
  316:             ret = DeviceIoControl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
  317:                                   &nb_modified_ram_pages,
  318:                                   sizeof(nb_modified_ram_pages),
  319:                                   NULL, 0, &temp, NULL);
  320: #else
  321:             ret = ioctl(kqemu_fd, KQEMU_MODIFY_RAM_PAGES,
  322:                         &nb_modified_ram_pages);
  323: #endif
  324:             kqemu_reset_modified_ram_pages();
  325:         }
  326:     }
  327: }
  328: 
  329: struct fpstate {
  330:     uint16_t fpuc;
  331:     uint16_t dummy1;
  332:     uint16_t fpus;
  333:     uint16_t dummy2;
  334:     uint16_t fptag;
  335:     uint16_t dummy3;
  336: 
  337:     uint32_t fpip;
  338:     uint32_t fpcs;
  339:     uint32_t fpoo;
  340:     uint32_t fpos;
  341:     uint8_t fpregs1[8 * 10];
  342: };
  343: 
  344: struct fpxstate {
  345:     uint16_t fpuc;
  346:     uint16_t fpus;
  347:     uint16_t fptag;
  348:     uint16_t fop;
  349:     uint32_t fpuip;
  350:     uint16_t cs_sel;
  351:     uint16_t dummy0;
  352:     uint32_t fpudp;
  353:     uint16_t ds_sel;
  354:     uint16_t dummy1;
  355:     uint32_t mxcsr;
  356:     uint32_t mxcsr_mask;
  357:     uint8_t fpregs1[8 * 16];
  358:     uint8_t xmm_regs[16 * 16];
  359:     uint8_t dummy2[96];
  360: };
  361: 
  362: static struct fpxstate fpx1 __attribute__((aligned(16)));
  363: 
  364: static void restore_native_fp_frstor(CPUState *env)
  365: {
  366:     int fptag, i, j;
  367:     struct fpstate fp1, *fp = &fp1;
  368: 
  369:     fp->fpuc = env->fpuc;
  370:     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
  371:     fptag = 0;
  372:     for (i=7; i>=0; i--) {
  373:         fptag <<= 2;
  374:         if (env->fptags[i]) {
  375:             fptag |= 3;
  376:         } else {
  377:             /* the FPU automatically computes it */
  378:         }
  379:     }
  380:     fp->fptag = fptag;
  381:     j = env->fpstt;
  382:     for(i = 0;i < 8; i++) {
  383:         memcpy(&fp->fpregs1[i * 10], &env->fpregs[j].d, 10);
  384:         j = (j + 1) & 7;
  385:     }
  386:     asm volatile ("frstor %0" : "=m" (*fp));
  387: }
  388: 
  389: static void save_native_fp_fsave(CPUState *env)
  390: {
  391:     int fptag, i, j;
  392:     uint16_t fpuc;
  393:     struct fpstate fp1, *fp = &fp1;
  394: 
  395:     asm volatile ("fsave %0" : : "m" (*fp));
  396:     env->fpuc = fp->fpuc;
  397:     env->fpstt = (fp->fpus >> 11) & 7;
  398:     env->fpus = fp->fpus & ~0x3800;
  399:     fptag = fp->fptag;
  400:     for(i = 0;i < 8; i++) {
  401:         env->fptags[i] = ((fptag & 3) == 3);
  402:         fptag >>= 2;
  403:     }
  404:     j = env->fpstt;
  405:     for(i = 0;i < 8; i++) {
  406:         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 10], 10);
  407:         j = (j + 1) & 7;
  408:     }
  409:     /* we must restore the default rounding state */
  410:     fpuc = 0x037f | (env->fpuc & (3 << 10));
  411:     asm volatile("fldcw %0" : : "m" (fpuc));
  412: }
  413: 
  414: static void restore_native_fp_fxrstor(CPUState *env)
  415: {
  416:     struct fpxstate *fp = &fpx1;
  417:     int i, j, fptag;
  418: 
  419:     fp->fpuc = env->fpuc;
  420:     fp->fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
  421:     fptag = 0;
  422:     for(i = 0; i < 8; i++)
  423:         fptag |= (env->fptags[i] << i);
  424:     fp->fptag = fptag ^ 0xff;
  425: 
  426:     j = env->fpstt;
  427:     for(i = 0;i < 8; i++) {
  428:         memcpy(&fp->fpregs1[i * 16], &env->fpregs[j].d, 10);
  429:         j = (j + 1) & 7;
  430:     }
  431:     if (env->cpuid_features & CPUID_SSE) {
  432:         fp->mxcsr = env->mxcsr;
  433:         /* XXX: check if DAZ is not available */
  434:         fp->mxcsr_mask = 0xffff;
  435:         memcpy(fp->xmm_regs, env->xmm_regs, CPU_NB_REGS * 16);
  436:     }
  437:     asm volatile ("fxrstor %0" : "=m" (*fp));
  438: }
  439: 
  440: static void save_native_fp_fxsave(CPUState *env)
  441: {
  442:     struct fpxstate *fp = &fpx1;
  443:     int fptag, i, j;
  444:     uint16_t fpuc;
  445: 
  446:     asm volatile ("fxsave %0" : : "m" (*fp));
  447:     env->fpuc = fp->fpuc;
  448:     env->fpstt = (fp->fpus >> 11) & 7;
  449:     env->fpus = fp->fpus & ~0x3800;
  450:     fptag = fp->fptag ^ 0xff;
  451:     for(i = 0;i < 8; i++) {
  452:         env->fptags[i] = (fptag >> i) & 1;
  453:     }
  454:     j = env->fpstt;
  455:     for(i = 0;i < 8; i++) {
  456:         memcpy(&env->fpregs[j].d, &fp->fpregs1[i * 16], 10);
  457:         j = (j + 1) & 7;
  458:     }
  459:     if (env->cpuid_features & CPUID_SSE) {
  460:         env->mxcsr = fp->mxcsr;
  461:         memcpy(env->xmm_regs, fp->xmm_regs, CPU_NB_REGS * 16);
  462:     }
  463: 
  464:     /* we must restore the default rounding state */
  465:     asm volatile ("fninit");
  466:     fpuc = 0x037f | (env->fpuc & (3 << 10));
  467:     asm volatile("fldcw %0" : : "m" (fpuc));
  468: }
  469: 
  470: static int do_syscall(CPUState *env,
  471:                       struct kqemu_cpu_state *kenv)
  472: {
  473:     int selector;
  474: 
  475:     selector = (env->star >> 32) & 0xffff;
  476: #ifdef __x86_64__
  477:     if (env->hflags & HF_LMA_MASK) {
  478:         int code64;
  479: 
  480:         env->regs[R_ECX] = kenv->next_eip;
  481:         env->regs[11] = env->eflags;
  482: 
  483:         code64 = env->hflags & HF_CS64_MASK;
  484: 
  485:         cpu_x86_set_cpl(env, 0);
  486:         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
  487:                                0, 0xffffffff,
  488:                                DESC_G_MASK | DESC_P_MASK |
  489:                                DESC_S_MASK |
  490:                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | DESC_L_MASK);
  491:         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
  492:                                0, 0xffffffff,
  493:                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  494:                                DESC_S_MASK |
  495:                                DESC_W_MASK | DESC_A_MASK);
  496:         env->eflags &= ~env->fmask;
  497:         if (code64)
  498:             env->eip = env->lstar;
  499:         else
  500:             env->eip = env->cstar;
  501:     } else
  502: #endif
  503:     {
  504:         env->regs[R_ECX] = (uint32_t)kenv->next_eip;
  505: 
  506:         cpu_x86_set_cpl(env, 0);
  507:         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
  508:                            0, 0xffffffff,
  509:                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  510:                                DESC_S_MASK |
  511:                                DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
  512:         cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc,
  513:                                0, 0xffffffff,
  514:                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
  515:                                DESC_S_MASK |
  516:                                DESC_W_MASK | DESC_A_MASK);
  517:         env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
  518:         env->eip = (uint32_t)env->star;
  519:     }
  520:     return 2;
  521: }
  522: 
  523: #ifdef CONFIG_PROFILER
  524: 
  525: #define PC_REC_SIZE 1
  526: #define PC_REC_HASH_BITS 16
  527: #define PC_REC_HASH_SIZE (1 << PC_REC_HASH_BITS)
  528: 
  529: typedef struct PCRecord {
  530:     unsigned long pc;
  531:     int64_t count;
  532:     struct PCRecord *next;
  533: } PCRecord;
  534: 
  535: static PCRecord *pc_rec_hash[PC_REC_HASH_SIZE];
  536: static int nb_pc_records;
  537: 
  538: static void kqemu_record_pc(unsigned long pc)
  539: {
  540:     unsigned long h;
  541:     PCRecord **pr, *r;
  542: 
  543:     h = pc / PC_REC_SIZE;
  544:     h = h ^ (h >> PC_REC_HASH_BITS);
  545:     h &= (PC_REC_HASH_SIZE - 1);
  546:     pr = &pc_rec_hash[h];
  547:     for(;;) {
  548:         r = *pr;
  549:         if (r == NULL)
  550:             break;
  551:         if (r->pc == pc) {
  552:             r->count++;
  553:             return;
  554:         }
  555:         pr = &r->next;
  556:     }
  557:     r = malloc(sizeof(PCRecord));
  558:     r->count = 1;
  559:     r->pc = pc;
  560:     r->next = NULL;
  561:     *pr = r;
  562:     nb_pc_records++;
  563: }
  564: 
  565: static int pc_rec_cmp(const void *p1, const void *p2)
  566: {
  567:     PCRecord *r1 = *(PCRecord **)p1;
  568:     PCRecord *r2 = *(PCRecord **)p2;
  569:     if (r1->count < r2->count)