goalc/arm: another couple dozen instructions

2026-06-08 04:17:28 -04:00 · 2026-04-10 00:00:03 -04:00
parent 2126e47381
commit 205b71dae0
4 changed files with 195 additions and 105 deletions
@@ -718,87 +718,87 @@ Instruction sar_gpr64_u8(const ObjectGenerator& gen, Register reg, uint8_t sa) {
 }

 Instruction jmp_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jmp_32);
+  IGEN_DISPATCH(jmp_imm);
 }

 Instruction je_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(je_32);
+  IGEN_DISPATCH(je_imm);
 }

 Instruction jne_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jne_32);
+  IGEN_DISPATCH(jne_imm);
 }

 Instruction jle_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jle_32);
+  IGEN_DISPATCH(jle_imm);
 }

 Instruction jge_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jge_32);
+  IGEN_DISPATCH(jge_imm);
 }

 Instruction jl_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jl_32);
+  IGEN_DISPATCH(jl_imm);
 }

 Instruction jg_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jg_32);
+  IGEN_DISPATCH(jg_imm);
 }

 Instruction jbe_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jbe_32);
+  IGEN_DISPATCH(jbe_imm);
 }

 Instruction jae_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jae_32);
+  IGEN_DISPATCH(jae_imm);
 }

 Instruction jb_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(jb_32);
+  IGEN_DISPATCH(jb_imm);
 }

 Instruction ja_32(const ObjectGenerator& gen) {
-  IGEN_DISPATCH(ja_32);
+  IGEN_DISPATCH(ja_imm);
 }

 Instruction cmp_flt_flt(const ObjectGenerator& gen, Register a, Register b) {
-  IGEN_DISPATCH(cmp_flt_flt, a, b);
+  IGEN_DISPATCH(cmp_f32_f32, a, b);
 }

 Instruction sqrts_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(sqrts_xmm, dst, src);
+  IGEN_DISPATCH(sqrt_f32, dst, src);
 }

 Instruction mulss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(mulss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(mul_f32_f32, dst, src);
 }

 Instruction divss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(divss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(div_f32_f32, dst, src);
 }

 Instruction subss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(subss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(sub_f32_f32, dst, src);
 }

 Instruction addss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(addss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(add_f32_f32, dst, src);
 }

 Instruction minss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(minss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(min_f32_f32, dst, src);
 }

 Instruction maxss_xmm_xmm(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(maxss_xmm_xmm, dst, src);
+  IGEN_DISPATCH(max_f32_f32, dst, src);
 }

 Instruction int32_to_float(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(int32_to_float, dst, src);
+  IGEN_DISPATCH(int32_to_f32, dst, src);
 }

 Instruction float_to_int32(const ObjectGenerator& gen, Register dst, Register src) {
-  IGEN_DISPATCH(float_to_int32, dst, src);
+  IGEN_DISPATCH(f32_to_int32, dst, src);
 }

 Instruction nop(const ObjectGenerator& gen) {
@@ -849,114 +849,189 @@ InstructionARM64 sar_gpr64_u8(Register reg, uint8_t sa) {
 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 //   CONTROL FLOW
 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+//
+// All of these instructions jump to a target that is zero
+// and then its up to the IR to patch the actual target
+//
+// Critically, on arm these relative targets must be within
+// 128MB, which is much less than x86 (~2GB)
+//
+// However, these functions are only really used for jumps within
+// a given function...of which the largest we've seen isn't even in the MB
+// of sizes
+//
+// So for now, keep it simple and don't implement something more
+// complicated like veneers, this should be fine.

-InstructionARM64 jmp_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jmp_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_uncond.html
+  // B <label>
+  return InstructionARM64(Base(0b000101, 6), Imm26(0));
 }

-InstructionARM64 je_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+// Now these instructions in ARM are even more limiting, conditional
+// branches must be within 1MB relative to the instruction
+//
+// However once again, that is still WAY below our biggest functions of a few kb
+//
+// But still...be aware!  There should be some assertions in place in the patching so that
+// said issues don't just fly under the radar
+//
+// Also, these instructions may have to be patched slightly differently since
+// ARM uses a single branch instruction for all
+//
+// It's worth noting that these x86 instructions also have limitations, they cannot
+// jump to far labels (labels in other code segments).  But that's more difficult to
+// give a numeric value to like with ARM.
+
+InstructionARM64 je_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 0000 	EQ
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b0000));
 }

-InstructionARM64 jne_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jne_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 0001 	NE
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b0001));
 }

-InstructionARM64 jle_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jle_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1101 	LE
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1101));
 }

-InstructionARM64 jge_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jge_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1010 	GE
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1010));
 }

-InstructionARM64 jl_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jl_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1011 	LT
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1011));
 }

-InstructionARM64 jg_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jg_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1100 	GT
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1100));
 }

-InstructionARM64 jbe_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jbe_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1001 	LS
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1001));
 }

-InstructionARM64 jae_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jae_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 0010 	CS
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b0010));
 }

-InstructionARM64 jb_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 jb_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 0011 	CC
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b0011));
 }

-InstructionARM64 ja_32() {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 ja_imm() {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/b_cond.html
+  // B.<cond> <label>
+  // 1000 	HI
+  return InstructionARM64(Base(0b01010100, 8), Imm19(0), Cond(0b1000));
 }

 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 //   FLOAT MATH
 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-InstructionARM64 cmp_flt_flt(Register a, Register b) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 cmp_f32_f32(Register a, Register b) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fcmp_float.html
+  // Single-precision (ftype == 00 && opc == 00)
+  // FCMP <Sn>, <Sm>
+  return InstructionARM64(Base(0b00011110001000000010000000000000, 32), Rn(a.id()), Rm(b.id()));
 }

-InstructionARM64 sqrts_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 sqrt_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fsqrt_float.html
+  // Single-precision (ftype == 00)
+  // FSQRT <Sd>, <Sn>
+  return InstructionARM64(Base(0b0001111000100001110000, 22), Rn(src.id()), Rm(dst.id()));
 }

-InstructionARM64 mulss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 mul_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fmul_float.html
+  // Single-precision (ftype == 00)
+  // FMUL <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000000010, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 divss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 div_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fdiv_float.html
+  // Single-precision (ftype == 00)
+  // FDIV <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000000110, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 subss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 sub_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fsub_float.html
+  // Single-precision (ftype == 00)
+  // FSUB <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000001110, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 addss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 add_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fadd_float.html
+  // Single-precision (ftype == 00)
+  // FADD <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000001010, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 minss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 min_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fmin_float.html
+  // Single-precision (ftype == 00)
+  // FMIN <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000010110, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 maxss_xmm_xmm(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 max_f32_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fmax_float.html
+  // Single-precision (ftype == 00)
+  // FMAX <Sd>, <Sn>, <Sm>
+  return InstructionARM64(Base(0b0001111000100000010010, 22), Rd(dst.id()), Rn(dst.id()),
+                          Rm(src.id()));
 }

-InstructionARM64 int32_to_float(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 int32_to_f32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/scvtf_float_int.html
+  // 32-bit to single-precision (sf == 0 && ftype == 00)
+  // SCVTF <Sd>, <Wn>
+  return InstructionARM64(Base(0b0001111000100010000000, 22), Rd(dst.id()), Rn(dst.id()));
 }

-InstructionARM64 float_to_int32(Register dst, Register src) {
-  ASSERT_MSG(false, "not yet implemented");
-  return InstructionARM64(0b0);
+InstructionARM64 f32_to_int32(Register dst, Register src) {
+  // https://www.scs.stanford.edu/~zyedidia/arm64/fcvtzs_float_int.html
+  // 32-bit to single-precision (sf == 0 && ftype == 00)
+  // FCVTZS <Wd>, <Sn>
+  return InstructionARM64(Base(0b0001111000111000000000, 22), Rd(dst.id()), Rn(dst.id()));
 }

 InstructionARM64 nop() {
@@ -515,57 +515,57 @@ InstructionARM64 sar_gpr64_u8(Register reg, uint8_t sa);
 /*!
 * Jump, 32-bit constant offset.  The offset is by default 0 and must be patched later.
 */
-InstructionARM64 jmp_32();
+InstructionARM64 jmp_imm();

 /*!
 * Jump if equal.
 */
-InstructionARM64 je_32();
+InstructionARM64 je_imm();

 /*!
 * Jump not equal.
 */
-InstructionARM64 jne_32();
+InstructionARM64 jne_imm();

 /*!
 * Jump less than or equal.
 */
-InstructionARM64 jle_32();
+InstructionARM64 jle_imm();

 /*!
 * Jump greater than or equal.
 */
-InstructionARM64 jge_32();
+InstructionARM64 jge_imm();

 /*!
 * Jump less than
 */
-InstructionARM64 jl_32();
+InstructionARM64 jl_imm();

 /*!
 * Jump greater than
 */
-InstructionARM64 jg_32();
+InstructionARM64 jg_imm();

 /*!
 * Jump below or equal
 */
-InstructionARM64 jbe_32();
+InstructionARM64 jbe_imm();

 /*!
 * Jump above or equal
 */
-InstructionARM64 jae_32();
+InstructionARM64 jae_imm();

 /*!
 * Jump below
 */
-InstructionARM64 jb_32();
+InstructionARM64 jb_imm();

 /*!
 * Jump above
 */
-InstructionARM64 ja_32();
+InstructionARM64 ja_imm();

 //;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 //   FLOAT MATH
@@ -574,49 +574,49 @@ InstructionARM64 ja_32();
 /*!
 * Compare two floats and set flag register for jump (ucomiss)
 */
-InstructionARM64 cmp_flt_flt(Register a, Register b);
+InstructionARM64 cmp_f32_f32(Register a, Register b);

-InstructionARM64 sqrts_xmm(Register dst, Register src);
+InstructionARM64 sqrt_f32(Register dst, Register src);

 /*!
 * Multiply two floats in xmm's
 */
-InstructionARM64 mulss_xmm_xmm(Register dst, Register src);
+InstructionARM64 mul_f32_f32(Register dst, Register src);

 /*!
 * Divide two floats in xmm's
 */
-InstructionARM64 divss_xmm_xmm(Register dst, Register src);
+InstructionARM64 div_f32_f32(Register dst, Register src);

 /*!
 * Subtract two floats in xmm's
 */
-InstructionARM64 subss_xmm_xmm(Register dst, Register src);
+InstructionARM64 sub_f32_f32(Register dst, Register src);

 /*!
 * Add two floats in xmm's
 */
-InstructionARM64 addss_xmm_xmm(Register dst, Register src);
+InstructionARM64 add_f32_f32(Register dst, Register src);

 /*!
 * Floating point minimum.
 */
-InstructionARM64 minss_xmm_xmm(Register dst, Register src);
+InstructionARM64 min_f32_f32(Register dst, Register src);

 /*!
 * Floating point maximum.
 */
-InstructionARM64 maxss_xmm_xmm(Register dst, Register src);
+InstructionARM64 max_f32_f32(Register dst, Register src);

 /*!
 * Convert GPR int32 to XMM float (single precision)
 */
-InstructionARM64 int32_to_float(Register dst, Register src);
+InstructionARM64 int32_to_f32(Register dst, Register src);

 /*!
 * Convert XMM float to GPR int32(single precision) (truncate)
 */
-InstructionARM64 float_to_int32(Register dst, Register src);
+InstructionARM64 f32_to_int32(Register dst, Register src);

 InstructionARM64 nop();

@@ -75,6 +75,16 @@ constexpr Field Imm12(u32 x) {
  return Field{(static_cast<uint32_t>(x) & 0b111111111111) << 10};
 }

+constexpr Field Imm26(u32 x) {
+  ASSERT(x >= 0 && x <= ((2 ^ 26) - 1));
+  return Field{(static_cast<uint32_t>(x) & 0b11111111111111111111111111) << 0};
+}
+
+constexpr Field Imm19(u32 x) {
+  ASSERT(x >= 0 && x <= ((2 ^ 19) - 1));
+  return Field{(static_cast<uint32_t>(x) & 0b1111111111111111111) << 5};
+}
+
 constexpr Field Imms(u32 x) {
  ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
  return Field{(static_cast<uint32_t>(x) & 0b111111) << 10};
@@ -84,6 +94,11 @@ constexpr Field Immr(u32 x) {
  ASSERT(x >= 0 && x <= ((2 ^ 6) - 1));
  return Field{(static_cast<uint32_t>(x) & 0b111111) << 16};
 }
+
+constexpr Field Cond(u32 x) {
+  ASSERT(x >= 0 && x <= ((2 ^ 4) - 1));
+  return Field{(static_cast<uint32_t>(x) & 0b1111) << 0};
+}
 }  // namespace ARM64

 struct InstructionARM64 : InstructionImpl<InstructionARM64> {