From 8a99e1bad25283b10c22a14e171ad119d2b8b4fd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Jul 2018 02:01:15 -0400 Subject: [PATCH] target/arm: Implement SVE load and broadcast quadword Backports commit 05abe304be2987cb3576729a14dab96e9ccfaec9 from qemu --- qemu/target/arm/sve.decode | 9 ++++++ qemu/target/arm/translate-sve.c | 55 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/qemu/target/arm/sve.decode b/qemu/target/arm/sve.decode index 7a993804..c7f4731f 100644 --- a/qemu/target/arm/sve.decode +++ b/qemu/target/arm/sve.decode @@ -715,6 +715,15 @@ LD_zprr 1010010 .. nreg:2 ..... 110 ... ..... ..... @rprr_load_msz # LD2B, LD2H, LD2W, LD2D; etc. LD_zpri 1010010 .. nreg:2 0.... 111 ... ..... ..... @rpri_load_msz +# SVE load and broadcast quadword (scalar plus scalar) +LD1RQ_zprr 1010010 .. 00 ..... 000 ... ..... ..... \ + @rprr_load_msz nreg=0 + +# SVE load and broadcast quadword (scalar plus immediate) +# LD1RQB, LD1RQH, LD1RQS, LD1RQD +LD1RQ_zpri 1010010 .. 00 0.... 001 ... ..... ..... \ + @rpri_load_msz nreg=0 + ### SVE Memory Store Group # SVE contiguous store (scalar plus immediate) diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index 898c6a46..21372f2e 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -3865,6 +3865,61 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) return true; } +static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz) +{ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + static gen_helper_gvec_mem * const fns[4] = { + gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r, + gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r, + }; + unsigned vsz = vec_full_reg_size(s); + TCGv_ptr t_pg; + TCGv_i32 desc; + + /* Load the first quadword using the normal predicated load helpers. */ + desc = tcg_const_i32(tcg_ctx, simd_desc(16, 16, zt)); + t_pg = tcg_temp_new_ptr(tcg_ctx); + + tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg)); + fns[msz](tcg_ctx, tcg_ctx->cpu_env, t_pg, addr, desc); + + tcg_temp_free_ptr(tcg_ctx, t_pg); + tcg_temp_free_i32(tcg_ctx, desc); + + /* Replicate that first quadword. */ + if (vsz > 16) { + unsigned dofs = vec_full_reg_offset(s, zt); + tcg_gen_gvec_dup_mem(tcg_ctx, 4, dofs + 16, dofs, vsz - 16, vsz - 16); + } +} + +static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn) +{ + if (a->rm == 31) { + return false; + } + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + int msz = dtype_msz(a->dtype); + TCGv_i64 addr = new_tmp_a64(s); + tcg_gen_shli_i64(tcg_ctx, addr, cpu_reg(s, a->rm), msz); + tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, a->rn)); + do_ldrq(s, a->rd, a->pg, addr, msz); + } + return true; +} + +static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn) +{ + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i64 addr = new_tmp_a64(s); + tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, a->rn), a->imm * 16); + do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype)); + } + return true; +} + static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz, int esz, int nreg) {