From 19a3c7e03fa78dc102bf8372d78662dceeef0409 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 16 Feb 2018 10:25:01 -0500 Subject: [PATCH] tcg: Emit prologue to the beginning of code_gen_buffer By putting the prologue at the end, we risk overwriting the prologue should our estimate of maximum TB size. Given the two different placements of the call to tcg_prologue_init, move the high water mark computation into tcg_prologue_init. Backports commit 8163b74938d8b7d12e70597c4553dd0dc49443d5 from qemu --- qemu/tcg/tcg.c | 34 +++++++++++++++++++++++---- qemu/translate-all.c | 55 ++++++++++++++++++++++++++------------------ 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c index 03b86357..ef8633d5 100644 --- a/qemu/tcg/tcg.c +++ b/qemu/tcg/tcg.c @@ -356,16 +356,42 @@ void tcg_context_init(TCGContext *s) void tcg_prologue_init(TCGContext *s) { - /* init global prologue and epilogue */ - s->code_buf = s->code_gen_prologue; - s->code_ptr = s->code_buf; + size_t prologue_size, total_size; + void *buf0, *buf1; + + /* Put the prologue at the beginning of code_gen_buffer. */ + buf0 = s->code_gen_buffer; + s->code_ptr = buf0; + s->code_buf = buf0; + s->code_gen_prologue = buf0; + + /* Generate the prologue. */ tcg_target_qemu_prologue(s); - flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + buf1 = s->code_ptr; + flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1); + + /* Deduct the prologue from the buffer. */ + prologue_size = tcg_current_code_size(s); + s->code_gen_ptr = buf1; + s->code_gen_buffer = buf1; + s->code_buf = buf1; + total_size = s->code_gen_buffer_size - prologue_size; + s->code_gen_buffer_size = total_size; + + /* Compute a high-water mark, at which we voluntarily flush the + buffer and start over. */ + s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE; + + // Unicorn: commented out + // tcg_register_jit(s->code_gen_buffer, total_size); #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { size_t size = tcg_current_code_size(s); qemu_log("PROLOGUE: [size=%zu]\n", size); + qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); + // Unicorn: commented out + //log_disas(buf0, prologue_size); qemu_log("\n"); qemu_log_flush(); } diff --git a/qemu/translate-all.c b/qemu/translate-all.c index c6675773..605a10df 100644 --- a/qemu/translate-all.c +++ b/qemu/translate-all.c @@ -597,8 +597,18 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) void free_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; - if (tcg_ctx->code_gen_buffer) - munmap(tcg_ctx->code_gen_buffer, tcg_ctx->code_gen_buffer_size); + + // Unicorn: Free the prologue rather than the buffer directly, as the prologue + // has the starting address of the same memory block that the code + // buffer is within. As the prologue is generated at the beginning of + // the memory block, the code buffer itself has the size of the prologue + // decremented from it. If the buffer was freed, then the address would + // be off by whatever size the prologue data is. + // + // See tcg_prologue_init in tcg.c for more info. + // + if (tcg_ctx->code_gen_prologue) + munmap(tcg_ctx->code_gen_prologue, tcg_ctx->code_gen_buffer_size); } static inline void *alloc_code_gen_buffer(struct uc_struct *uc) @@ -676,8 +686,18 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc) void free_code_gen_buffer(struct uc_struct *uc) { TCGContext *tcg_ctx = uc->tcg_ctx; - if (tcg_ctx->code_gen_buffer) - g_free(tcg_ctx->code_gen_buffer); + + // Unicorn: Free the prologue rather than the buffer directly, as the prologue + // has the starting address of the same memory block that the code + // buffer is within. As the prologue is generated at the beginning of + // the memory block, the code buffer itself has the size of the prologue + // decremented from it. If the buffer was freed, then the address would + // be off by whatever size the prologue data is. + // + // See tcg_prologue_init in tcg.c for more info. + // + if (tcg_ctx->code_gen_prologue) + g_free(tcg_ctx->code_gen_prologue); } static inline void *alloc_code_gen_buffer(struct uc_struct *uc) @@ -721,24 +741,16 @@ static inline void code_gen_alloc(struct uc_struct *uc, size_t tb_size) exit(1); } - //qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size, - // QEMU_MADV_HUGEPAGE); + // Unicorn: Commented out + //qemu_madvise(tcg_ctx->code_gen_buffer, tcg_ctx->code_gen_buffer_size, + // QEMU_MADV_HUGEPAGE); - /* Steal room for the prologue at the end of the buffer. This ensures - (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches - from TB's to the prologue are going to be in range. It also means - that we don't need to mark (additional) portions of the data segment - as executable. */ - tcg_ctx->code_gen_prologue = (char*)tcg_ctx->code_gen_buffer + - tcg_ctx->code_gen_buffer_size - 1024; - tcg_ctx->code_gen_buffer_size -= 1024; - - tcg_ctx->code_gen_buffer_max_size = tcg_ctx->code_gen_buffer_size - - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); - tcg_ctx->code_gen_max_blocks = tcg_ctx->code_gen_buffer_size / - CODE_GEN_AVG_BLOCK_SIZE; - tcg_ctx->tb_ctx.tbs = - g_malloc(tcg_ctx->code_gen_max_blocks * sizeof(TranslationBlock)); + /* Estimate a good size for the number of TBs we can support. We + still haven't deducted the prologue from the buffer size here, + but that's minimal and won't affect the estimate much. */ + tcg_ctx->code_gen_max_blocks + = tcg_ctx->code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; + tcg_ctx->tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx->code_gen_max_blocks); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -751,7 +763,6 @@ void tcg_exec_init(struct uc_struct *uc, unsigned long tb_size) cpu_gen_init(uc); code_gen_alloc(uc, tb_size); tcg_ctx = uc->tcg_ctx; - tcg_ctx->code_gen_ptr = tcg_ctx->code_gen_buffer; tcg_ctx->uc = uc; page_init(); #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)