From 19a3c7e03fa78dc102bf8372d78662dceeef0409 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Fri, 16 Feb 2018 10:25:01 -0500
Subject: [PATCH] tcg: Emit prologue to the beginning of code_gen_buffer

By putting the prologue at the end, we risk overwriting the
prologue should our estimate of maximum TB size. Given the
two different placements of the call to tcg_prologue_init,
move the high water mark computation into tcg_prologue_init.

Backports commit 8163b74938d8b7d12e70597c4553dd0dc49443d5 from qemu
---
 qemu/tcg/tcg.c       | 34 +++++++++++++++++++++++----
 qemu/translate-all.c | 55 ++++++++++++++++++++++++++------------------
 2 files changed, 63 insertions(+), 26 deletions(-)

diff --git a/qemu/tcg/tcg.c b/qemu/tcg/tcg.c
index 03b86357..ef8633d5 100644
--- a/qemu/tcg/tcg.c
+++ b/qemu/tcg/tcg.c
@@ -356,16 +356,42 @@ void tcg_context_init(TCGContext *s)
 
 void tcg_prologue_init(TCGContext *s)
 {
-    /* init global prologue and epilogue */
-    s->code_buf = s->code_gen_prologue;
-    s->code_ptr = s->code_buf;
+    size_t prologue_size, total_size;
+    void *buf0, *buf1;
+
+    /* Put the prologue at the beginning of code_gen_buffer.  */
+    buf0 = s->code_gen_buffer;
+    s->code_ptr = buf0;
+    s->code_buf = buf0;
+    s->code_gen_prologue = buf0;
+
+    /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
-    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+    buf1 = s->code_ptr;
+    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+
+    /* Deduct the prologue from the buffer.  */
+    prologue_size = tcg_current_code_size(s);
+    s->code_gen_ptr = buf1;
+    s->code_gen_buffer = buf1;
+    s->code_buf = buf1;
+    total_size = s->code_gen_buffer_size - prologue_size;
+    s->code_gen_buffer_size = total_size;
+
+    /* Compute a high-water mark, at which we voluntarily flush the
+       buffer and start over.  */
+    s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE;
+
+    // Unicorn: commented out
+    // tcg_register_jit(s->code_gen_buffer, total_size);
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
         size_t size = tcg_current_code_size(s);
         qemu_log("PROLOGUE: [size=%zu]\n", size);
+        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
+        // Unicorn: commented out
+        //log_disas(buf0, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
     }
diff --git a/qemu/translate-all.c b/qemu/translate-all.c
index c6675773..605a10df 100644
--- a/qemu/translate-all.c
+++ b/qemu/translate-all.c
@@ -597,8 +597,18 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
 void free_code_gen_buffer(struct uc_struct *uc)
 {
     TCGContext *tcg_ctx = uc->tcg_ctx;
-    if (tcg_ctx->code_gen_buffer)
-        munmap(tcg_ctx->code_gen_buffer, tcg_ctx->code_gen_buffer_size);
+
+    // Unicorn: Free the prologue rather than the buffer directly, as the prologue
+    //          has the starting address of the same memory block that the code
+    //          buffer is within. As the prologue is generated at the beginning of
+    //          the memory block, the code buffer itself has the size of the prologue
+    //          decremented from it. If the buffer was freed, then the address would
+    //          be off by whatever size the prologue data is.
+    //
+    //          See tcg_prologue_init in tcg.c for more info.
+    //
+    if (tcg_ctx->code_gen_prologue)
+        munmap(tcg_ctx->code_gen_prologue, tcg_ctx->code_gen_buffer_size);
 }
 
 static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
@@ -676,8 +686,18 @@ static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
 void free_code_gen_buffer(struct uc_struct *uc)
 {
     TCGContext *tcg_ctx = uc->tcg_ctx;
-    if (tcg_ctx->code_gen_buffer)
-        g_free(tcg_ctx->code_gen_buffer);
+
+    // Unicorn: Free the prologue rather than the buffer directly, as the prologue
+    //          has the starting address of the same memory block that the code
+    //          buffer is within. As the prologue is generated at the beginning of
+    //          the memory block, the code buffer itself has the size of the prologue
+    //          decremented from it. If the buffer was freed, then the address would
+    //          be off by whatever size the prologue data is.
+    //
+    //          See tcg_prologue_init in tcg.c for more info.
+    //
+    if (tcg_ctx->code_gen_prologue)
+        g_free(tcg_ctx->code_gen_prologue);
 }
 
 static inline void *alloc_code_gen_buffer(struct uc_struct *uc)
@@ -721,24 +741,16 @@ static inline void code_gen_alloc(struct uc_struct *uc, size_t tb_size)
         exit(1);
     }
 
-    //qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
-    //        QEMU_MADV_HUGEPAGE);
+    // Unicorn: Commented out
+    //qemu_madvise(tcg_ctx->code_gen_buffer, tcg_ctx->code_gen_buffer_size,
+    //             QEMU_MADV_HUGEPAGE);
 
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    tcg_ctx->code_gen_prologue = (char*)tcg_ctx->code_gen_buffer +
-            tcg_ctx->code_gen_buffer_size - 1024;
-    tcg_ctx->code_gen_buffer_size -= 1024;
-
-    tcg_ctx->code_gen_buffer_max_size = tcg_ctx->code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    tcg_ctx->code_gen_max_blocks = tcg_ctx->code_gen_buffer_size /
-            CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx->tb_ctx.tbs =
-            g_malloc(tcg_ctx->code_gen_max_blocks * sizeof(TranslationBlock));
+    /* Estimate a good size for the number of TBs we can support.  We
+       still haven't deducted the prologue from the buffer size here,
+       but that's minimal and won't affect the estimate much.  */
+    tcg_ctx->code_gen_max_blocks
+        = tcg_ctx->code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx->tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx->code_gen_max_blocks);
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -751,7 +763,6 @@ void tcg_exec_init(struct uc_struct *uc, unsigned long tb_size)
     cpu_gen_init(uc);
     code_gen_alloc(uc, tb_size);
     tcg_ctx = uc->tcg_ctx;
-    tcg_ctx->code_gen_ptr = tcg_ctx->code_gen_buffer;
     tcg_ctx->uc = uc;
     page_init();
 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)