From d49bd55f52365c115ff51e3f72b94453905c50af Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Mon, 26 Feb 2018 03:04:08 -0500 Subject: [PATCH] tcg/i386: Add support for fence Generate a 'lock orl $0,0(%esp)' instruction for ordering instead of mfence which has similar ordering semantics. Backports commit a7d00d4effb58889ac6df64f98ac50c9d1594149 from qemu --- qemu/tcg/i386/tcg-target.inc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/qemu/tcg/i386/tcg-target.inc.c b/qemu/tcg/i386/tcg-target.inc.c index e71fd9f0..4f7e29f6 100644 --- a/qemu/tcg/i386/tcg-target.inc.c +++ b/qemu/tcg/i386/tcg-target.inc.c @@ -712,6 +712,18 @@ static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val) } } +static inline void tcg_out_mb(TCGContext *s, TCGArg a0) +{ + /* Given the strength of x86 memory ordering, we only need care for + store-load ordering. Experimentally, "lock orl $0,0(%esp)" is + faster than "mfence", so don't bother with the sse insn. */ + if (a0 & TCG_MO_ST_LD) { + tcg_out8(s, 0xf0); + tcg_out_modrm_offset(s, OPC_ARITH_EvIb, ARITH_OR, TCG_REG_ESP, 0); + tcg_out8(s, 0); + } +} + static inline void tcg_out_push(TCGContext *s, int reg) { tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0); @@ -2232,6 +2244,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_mb: + tcg_out_mb(s, args[0]); + break; case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */ @@ -2297,6 +2312,8 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } }, { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } }, + { INDEX_op_mb, { } }, + #if TCG_TARGET_REG_BITS == 32 { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } }, { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },