diff --git a/decompiler/Function/Function.h b/decompiler/Function/Function.h index 65bee8cd90..b8aa7b96ff 100644 --- a/decompiler/Function/Function.h +++ b/decompiler/Function/Function.h @@ -94,7 +94,8 @@ class Function { bool run_type_analysis_ir2(const TypeSpec& my_type, DecompilerTypeSystem& dts, LinkedObjectFile& file, - const std::unordered_map>& hints); + const std::unordered_map>& hints, + const std::unordered_map& label_types); BlockTopologicalSort bb_topo_sort(); TypeSpec type; diff --git a/decompiler/Function/TypeAnalysis.cpp b/decompiler/Function/TypeAnalysis.cpp index 685bd2ce24..f2eb3b5b75 100644 --- a/decompiler/Function/TypeAnalysis.cpp +++ b/decompiler/Function/TypeAnalysis.cpp @@ -44,12 +44,15 @@ void try_apply_hints(int idx, } } // namespace -bool Function::run_type_analysis_ir2(const TypeSpec& my_type, - DecompilerTypeSystem& dts, - LinkedObjectFile& file, - const std::unordered_map>& hints) { +bool Function::run_type_analysis_ir2( + const TypeSpec& my_type, + DecompilerTypeSystem& dts, + LinkedObjectFile& file, + const std::unordered_map>& hints, + const std::unordered_map& label_types) { (void)file; ir2.env.set_type_hints(hints); + ir2.env.set_label_types(label_types); // STEP 0 - set decompiler type system settings for this function. In config we can manually // specify some settings for type propagation to reduce the strictness of type propagation. // TODO - this is kinda hacky so that it works in both unit tests and actual decompilation. diff --git a/decompiler/IR2/AtomicOp.cpp b/decompiler/IR2/AtomicOp.cpp index c68eb413e5..953c78e2f9 100644 --- a/decompiler/IR2/AtomicOp.cpp +++ b/decompiler/IR2/AtomicOp.cpp @@ -1064,7 +1064,7 @@ void LoadVarOp::collect_vars(VariableSet& vars) const { ///////////////////////////// IR2_BranchDelay::IR2_BranchDelay(Kind kind) : m_kind(kind) { - assert(m_kind == Kind::NOP || m_kind == Kind::NO_DELAY); + assert(m_kind == Kind::NOP || m_kind == Kind::NO_DELAY || m_kind == Kind::UNKNOWN); } IR2_BranchDelay::IR2_BranchDelay(Kind kind, Variable var0) : m_kind(kind) { @@ -1129,6 +1129,8 @@ goos::Object IR2_BranchDelay::to_form(const std::vector& labels assert(m_var[1].has_value()); return pretty_print::build_list("set!", m_var[0]->to_form(env), pretty_print::build_list("-", m_var[1]->to_form(env))); + case Kind::UNKNOWN: + return pretty_print::build_list("unknown-branch-delay!"); default: assert(false); } @@ -1237,6 +1239,78 @@ void BranchOp::collect_vars(VariableSet& vars) const { m_branch_delay.collect_vars(vars); } +///////////////////////////// +// AsmBranchOp +///////////////////////////// + +AsmBranchOp::AsmBranchOp(bool likely, + IR2_Condition condition, + int label, + std::shared_ptr branch_delay, + int my_idx) + : AtomicOp(my_idx), + m_likely(likely), + m_condition(std::move(condition)), + m_label(label), + m_branch_delay(std::move(branch_delay)) {} + +goos::Object AsmBranchOp::to_form(const std::vector& labels, + const Env& env) const { + std::vector forms; + + if (m_likely) { + forms.push_back(pretty_print::to_symbol("bl!")); + } else { + forms.push_back(pretty_print::to_symbol("b!")); + } + + forms.push_back(m_condition.to_form(labels, env)); + forms.push_back(pretty_print::to_symbol(labels.at(m_label).name)); + forms.push_back(m_branch_delay->to_form(labels, env)); + + return pretty_print::build_list(forms); +} + +bool AsmBranchOp::operator==(const AtomicOp& other) const { + if (typeid(BranchOp) != typeid(other)) { + return false; + } + + auto po = dynamic_cast(&other); + assert(po); + return m_likely == po->m_likely && m_condition == po->m_condition && m_label == po->m_label && + m_branch_delay == po->m_branch_delay; +} + +bool AsmBranchOp::is_sequence_point() const { + return true; +} + +Variable AsmBranchOp::get_set_destination() const { + throw std::runtime_error("AsmBranchOp cannot be treated as a set! operation"); +} + +void AsmBranchOp::update_register_info() { + m_condition.get_regs(&m_read_regs); + m_branch_delay->update_register_info(); + for (auto x : m_branch_delay->read_regs()) { + m_read_regs.push_back(x); + } + + for (auto x : m_branch_delay->write_regs()) { + m_write_regs.push_back(x); + } + + for (auto x : m_branch_delay->clobber_regs()) { + m_clobber_regs.push_back(x); + } +} + +void AsmBranchOp::collect_vars(VariableSet& vars) const { + m_condition.collect_vars(vars); + m_branch_delay->collect_vars(vars); +} + ///////////////////////////// // SpecialOp ///////////////////////////// diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 9c5191c9c1..a650aca9a8 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -541,6 +541,34 @@ class BranchOp : public AtomicOp { IR2_BranchDelay m_branch_delay; }; +/*! + * This represents an unknown branch instruction that we think was generated from inline assembly + */ +class AsmBranchOp : public AtomicOp { + public: + AsmBranchOp(bool likely, + IR2_Condition condition, + int label, + std::shared_ptr branch_delay, + int my_idx); + goos::Object to_form(const std::vector& labels, const Env& env) const override; + bool operator==(const AtomicOp& other) const override; + bool is_sequence_point() const override; + Variable get_set_destination() const override; + FormElement* get_as_form(FormPool& pool, const Env& env) const override; + void update_register_info() override; + TypeState propagate_types_internal(const TypeState& input, + const Env& env, + DecompilerTypeSystem& dts) override; + void collect_vars(VariableSet& vars) const override; + + private: + bool m_likely = false; + IR2_Condition m_condition; + int m_label = -1; + std::shared_ptr m_branch_delay; +}; + /*! * A "special" op has no arguments. * NOP, BREAK, SUSPEND, diff --git a/decompiler/IR2/AtomicOpForm.cpp b/decompiler/IR2/AtomicOpForm.cpp index e4276738a9..d6a9a919a9 100644 --- a/decompiler/IR2/AtomicOpForm.cpp +++ b/decompiler/IR2/AtomicOpForm.cpp @@ -362,6 +362,24 @@ FormElement* LoadVarOp::get_as_form(FormPool& pool, const Env& env) const { } } + if (m_src.is_identity() && m_src.get_arg(0).is_label() && + (m_kind == Kind::FLOAT || m_kind == Kind::SIGNED) && m_size == 4) { + // try to see if we're loading a constant + auto label = env.file->labels.at(m_src.get_arg(0).label()); + auto label_name = label.name; + auto hint = env.label_types().find(label_name); + if (hint != env.label_types().end()) { + if (hint->second.is_const && hint->second.type_name == "float") { + auto word = env.file->words_by_seg.at(label.target_segment).at(label.offset / 4); + assert(word.kind == LinkedWord::PLAIN_DATA); + float value; + memcpy(&value, &word.data, 4); + auto float_elt = pool.alloc_single_element_form(nullptr, value); + return pool.alloc_element(m_dst, float_elt, true); + } + } + } + auto source = pool.alloc_single_element_form(nullptr, m_src, m_my_idx); auto load = pool.alloc_single_element_form(nullptr, source, m_size, m_kind); return pool.alloc_element(m_dst, load, true); @@ -403,4 +421,8 @@ FormElement* ConditionalMoveFalseOp::get_as_form(FormPool& pool, const Env&) con FormElement* FunctionEndOp::get_as_form(FormPool& pool, const Env&) const { return pool.alloc_element(this); } + +FormElement* AsmBranchOp::get_as_form(FormPool& pool, const Env&) const { + return pool.alloc_element(this); +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/AtomicOpTypeAnalysis.cpp b/decompiler/IR2/AtomicOpTypeAnalysis.cpp index 46cc7d0360..778a376cb4 100644 --- a/decompiler/IR2/AtomicOpTypeAnalysis.cpp +++ b/decompiler/IR2/AtomicOpTypeAnalysis.cpp @@ -142,6 +142,7 @@ TP_Type SimpleExpression::get_type(const TypeState& input, return in_type; } case Kind::FPR_TO_GPR: + return m_args[0].get_type(input, env, dts); case Kind::DIV_S: case Kind::SUB_S: case Kind::MUL_S: @@ -518,6 +519,12 @@ TP_Type LoadVarOp::get_src_type(const TypeState& input, // this could technically hide loading a different type from inside of a static basic. return TP_Type::make_from_ts(dts.ts.make_typespec("uint")); } + + auto label_name = env.file->labels.at(src.label()).name; + auto hint = env.label_types().find(label_name); + if (hint != env.label_types().end()) { + return TP_Type::make_from_ts(env.dts->parse_type_spec(hint->second.type_name)); + } } } @@ -869,4 +876,15 @@ void FunctionEndOp::mark_function_as_no_return_value() { m_function_has_return_value = false; } +TypeState AsmBranchOp::propagate_types_internal(const TypeState& input, + const Env&, + DecompilerTypeSystem&) { + // for now, just make everything uint + TypeState output = input; + for (auto x : m_write_regs) { + output.get(x) = TP_Type::make_from_ts("uint"); + } + return output; +} + } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index 1e132eee6a..4e559e0862 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -107,6 +107,11 @@ class Env { return m_var_names.eliminated_move_op_ids.find(op_id) != m_var_names.eliminated_move_op_ids.end(); } + const std::unordered_map& label_types() const { return m_label_types; } + + void set_label_types(const std::unordered_map& types) { + m_label_types = types; + } LinkedObjectFile* file = nullptr; DecompilerTypeSystem* dts = nullptr; @@ -129,5 +134,6 @@ class Env { std::unordered_map> m_typehints; std::unordered_map m_var_remap; + std::unordered_map m_label_types; }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/Form.cpp b/decompiler/IR2/Form.cpp index d2322ed4f3..76a7683c70 100644 --- a/decompiler/IR2/Form.cpp +++ b/decompiler/IR2/Form.cpp @@ -1643,6 +1643,31 @@ void ConstantTokenElement::apply_form(const std::function&) {} void ConstantTokenElement::collect_vars(VariableSet&) const {} void ConstantTokenElement::get_modified_regs(RegSet&) const {} +///////////////////////////// +// ConstantFloatElement +///////////////////////////// + +ConstantFloatElement::ConstantFloatElement(float value) : m_value(value) {} + +void ConstantFloatElement::apply(const std::function&) {} +void ConstantFloatElement::apply_form(const std::function&) {} +void ConstantFloatElement::collect_vars(VariableSet&) const {} +void ConstantFloatElement::get_modified_regs(RegSet&) const {} + +goos::Object ConstantFloatElement::to_form_internal(const Env&) const { + // return goos::Object::make_float(m_value); + int rounded = m_value; + bool exact_int = ((float)rounded) == m_value; + if (m_value == 0.5 || m_value == -0.5 || m_value == 0.0 || m_value == 1.0 || m_value == -1.0 || + exact_int) { + return goos::Object::make_float(m_value); + } else { + u32 value; + memcpy(&value, &m_value, 4); + return pretty_print::build_list("the-as", "float", fmt::format("#x{:x}", value)); + } +} + StorePlainDeref::StorePlainDeref(DerefElement* dst, SimpleExpression expr, int my_idx, diff --git a/decompiler/IR2/Form.h b/decompiler/IR2/Form.h index 7485f54ab8..edd3dbf06f 100644 --- a/decompiler/IR2/Form.h +++ b/decompiler/IR2/Form.h @@ -1027,7 +1027,7 @@ class StringConstantElement : public FormElement { class ConstantTokenElement : public FormElement { public: - ConstantTokenElement(const std::string& value); + explicit ConstantTokenElement(const std::string& value); goos::Object to_form_internal(const Env& env) const override; void apply(const std::function& f) override; void apply_form(const std::function& f) override; @@ -1043,6 +1043,24 @@ class ConstantTokenElement : public FormElement { std::string m_value; }; +class ConstantFloatElement : public FormElement { + public: + explicit ConstantFloatElement(float value); + goos::Object to_form_internal(const Env& env) const override; + void apply(const std::function& f) override; + void apply_form(const std::function& f) override; + void collect_vars(VariableSet& vars) const override; + void get_modified_regs(RegSet& regs) const override; + void update_from_stack(const Env& env, + FormPool& pool, + FormStack& stack, + std::vector* result, + bool allow_side_effects) override; + + private: + float m_value; +}; + class StorePlainDeref : public FormElement { public: StorePlainDeref(DerefElement* dst, diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index 8e07a4f77c..a671a88f2b 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -101,14 +101,21 @@ void pop_helper(const std::vector& vars, std::vector submit_reg_to_var; // build submission for stack + std::unordered_map reg_counts; + for (auto& v : vars) { + reg_counts[v.reg()]++; + } + for (size_t var_idx = 0; var_idx < vars.size(); var_idx++) { const auto& var = vars.at(var_idx); auto& ri = env.reg_use().op.at(var.idx()); RegSet consumes_to_use = consumes.value_or(ri.consumes); if (consumes_to_use.find(var.reg()) != consumes_to_use.end()) { - // we consume the register, so it's safe to try popping. - submit_reg_to_var.push_back(var_idx); - submit_regs.push_back(var.reg()); + if (reg_counts.at(var.reg()) == 1) { + // we consume the register, so it's safe to try popping. + submit_reg_to_var.push_back(var_idx); + submit_regs.push_back(var.reg()); + } } } @@ -912,6 +919,15 @@ void SetVarElement::push_to_stack(const Env& env, FormPool& pool, FormStack& sta if (m_src->is_single_element()) { auto src_as_se = dynamic_cast(m_src->back()); if (src_as_se) { + if (src_as_se->expr().kind() == SimpleExpression::Kind::IDENTITY && + m_dst.reg().get_kind() == Reg::FPR && src_as_se->expr().get_arg(0).is_int() && + src_as_se->expr().get_arg(0).get_int() == 0) { + stack.push_value_to_reg(m_dst, + pool.alloc_single_element_form(nullptr, 0.0), + true, m_var_info); + return; + } + if (src_as_se->expr().kind() == SimpleExpression::Kind::IDENTITY && src_as_se->expr().get_arg(0).is_var()) { // this can happen late in the case of coloring moves which are also gpr -> fpr's @@ -946,7 +962,8 @@ void SetFormFormElement::push_to_stack(const Env&, FormPool&, FormStack& stack) } void StoreInSymbolElement::push_to_stack(const Env& env, FormPool& pool, FormStack& stack) { - auto sym = pool.alloc_single_element_form(nullptr, m_sym_name); + auto sym = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_sym_val(m_sym_name).as_expr(), m_my_idx); auto val = pool.alloc_single_element_form(nullptr, m_value, m_my_idx); val->update_children_from_stack(env, pool, stack, true); @@ -1780,6 +1797,15 @@ FormElement* ConditionElement::make_generic(const Env&, casted); } + case IR2_Condition::Kind::LEQ_ZERO_SIGNED: { + auto casted = make_cast(source_forms, types, TypeSpec("int"), pool); + auto zero = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_int_constant(0)); + casted.push_back(zero); + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::LEQ), + casted); + } + case IR2_Condition::Kind::GEQ_ZERO_SIGNED: { auto casted = make_cast(source_forms, types, TypeSpec("int"), pool); auto zero = pool.alloc_single_element_form( @@ -2319,4 +2345,13 @@ void ConstantTokenElement::update_from_stack(const Env&, result->push_back(this); } +void ConstantFloatElement::update_from_stack(const Env&, + FormPool&, + FormStack&, + std::vector* result, + bool) { + mark_popped(); + result->push_back(this); +} + } // namespace decompiler diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index d8300e2e12..1e5761becd 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -283,7 +283,8 @@ void ObjectFileDB::ir2_type_analysis_pass() { attempted_functions++; // try type analysis here. auto hints = get_config().type_hints_by_function_by_idx[func.guessed_name.to_string()]; - if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints)) { + auto label_types = get_config().label_types[data.to_unique_name()]; + if (func.run_type_analysis_ir2(ts, dts, data.linked_data, hints, label_types)) { successful_functions++; } else { func.warnings.type_prop_warning("Type analysis failed"); @@ -469,6 +470,31 @@ std::string ObjectFileDB::ir2_to_file(ObjectFileData& data) { } result += '\n'; + } else if (func.ir2.atomic_ops_succeeded) { + auto& ao = func.ir2.atomic_ops; + for (size_t i = 0; i < ao->ops.size(); i++) { + auto& op = ao->ops.at(i); + + if (!dynamic_cast(op.get())) { + auto instr_idx = ao->atomic_op_to_instruction.at(i); + + // check for a label to print + auto label_id = data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4); + if (label_id != -1) { + result += fmt::format("(label {})\n", data.linked_data.labels.at(label_id).name); + } + // check for no misaligned labels in code segments. + for (int j = 1; j < 4; j++) { + assert(data.linked_data.get_label_at(seg, (func.start_word + instr_idx) * 4 + j) == + -1); + } + + // print assembly ops. + } + + // print instruction + result += fmt::format(" {}\n", op->to_string(func.ir2.env)); + } } if (func.ir2.print_debug_forms) { diff --git a/decompiler/analysis/atomic_op_builder.cpp b/decompiler/analysis/atomic_op_builder.cpp index 8f888ee9f4..db40df6b47 100644 --- a/decompiler/analysis/atomic_op_builder.cpp +++ b/decompiler/analysis/atomic_op_builder.cpp @@ -10,6 +10,8 @@ namespace decompiler { namespace { +std::unique_ptr convert_1(const Instruction& i0, int idx); + ////////////////////// // Register Helpers ////////////////////// @@ -376,7 +378,8 @@ std::unique_ptr make_branch(const IR2_Condition& condition, if (branch_delay.is_known()) { return std::make_unique(likely, condition, dest_label, branch_delay, my_idx); } else { - return nullptr; + auto delay_op = std::shared_ptr(convert_1(delay, my_idx)); + return std::make_unique(likely, condition, dest_label, delay_op, my_idx); } } diff --git a/decompiler/config.cpp b/decompiler/config.cpp index 3235a35c78..787b03e332 100644 --- a/decompiler/config.cpp +++ b/decompiler/config.cpp @@ -125,5 +125,17 @@ void set_config(const std::string& path_to_config_file) { } } } + + auto label_types_json = read_json_file_from_config(cfg, "label_types_file"); + for (auto& kv : label_types_json.items()) { + auto& obj_name = kv.key(); + auto& types = kv.value(); + for (auto& x : types) { + const auto& name = x.at(0).get(); + const auto& type_name = x.at(1).get(); + bool is_const = x.at(2).get(); + gConfig.label_types[obj_name][name] = {type_name, is_const}; + } + } } } // namespace decompiler \ No newline at end of file diff --git a/decompiler/config.h b/decompiler/config.h index 6030f2c693..280786feee 100644 --- a/decompiler/config.h +++ b/decompiler/config.h @@ -15,6 +15,11 @@ struct TypeHint { std::string type_name; }; +struct LabelType { + std::string type_name; + bool is_const = false; +}; + struct Config { int game_version = -1; std::vector dgo_names; @@ -46,6 +51,8 @@ struct Config { anon_function_types_by_obj_by_id; std::unordered_map> function_arg_names; std::unordered_map> function_var_names; + + std::unordered_map> label_types; bool run_ir2 = false; }; diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index a0c8cf7be5..c336c8cd1d 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -770,11 +770,31 @@ ; (define-extern seek function) ; ;;(define-extern xyzw object) ;; unknown type ; -; ;;(define-extern *random-generator* object) ;; unknown type -; -; ;;(define-extern rgba object) ;; unknown type +(define-extern *random-generator* random-generator) ;; unknown type ; +(deftype rgba (uint32) + ((r uint8 :offset 0) + (g uint8 :offset 8) + (b uint8 :offset 16) + (a uint8 :offset 24) + ) + :flag-assert #x900000004 + ) + +;; TODO: fields +(deftype xyzw (uint128) + () + :flag-assert #x900000010 + ) + +;; TODO: fields +(deftype xyzwh (uint128) + () + :flag-assert #x900000010 + ) + + ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -1434,7 +1454,7 @@ (define-extern matrix-translate! function) (define-extern matrix-4x4-inverse! function) -(define-extern vector-sincos! function) + (define-extern trs-matrix-calc! function) (define-extern transform-matrix-parent-calc! function) (define-extern transform-matrix-calc! function) @@ -1453,6 +1473,81 @@ (define-extern eul->quat function) (define-extern matrix->eul function) +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; TRIGONOMETRY ;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; + +(deftype float-type (uint32) + () + ) + +(define-extern radmod (function float float)) +(define-extern deg- (function float float float)) +(define-extern deg-diff (function float float float)) +(define-extern deg-seek (function float float float float)) +(define-extern deg-seek-smooth (function float float float float float)) +(define-extern deg-lerp-clamp (function float float float float)) +(define-extern sin (function float float)) +(define-extern sin-rad (function float float)) +(define-extern *sin-poly-vec* vector) +(define-extern *sin-poly-vec2* vector) +(define-extern vector-sin-rad! (function vector vector vector)) +(define-extern cos-rad (function float float)) +(define-extern *cos-poly-vec* vector) +(define-extern vector-cos-rad! (function vector vector vector)) +(define-extern vector-sincos-rad! (function vector vector vector int)) +(define-extern vector-sincos! (function vector vector vector int)) +(define-extern tan-rad (function float float)) +(define-extern vector-rad<-vector-deg! (function vector vector none)) +(define-extern vector-rad<-vector-deg/2! (function vector vector int)) +(define-extern atan0 (function float float float)) +(define-extern atan-series-rad (function float float)) +(define-extern atan-rad (function float float)) +(define-extern atan2-rad (function float float float)) +(define-extern acos-rad (function float float)) +(define-extern acos (function float float)) + +(define-extern coserp (function float float float float)) +(define-extern sinerp-clamp (function float float float float)) +;;(define-extern exp-slead object) ;; unknown type +(define-extern coserp180-clamp (function float float float float)) + ;; unknown type +(define-extern exp (function float float)) + + + +(define-extern deg-seek (function float float float float)) +(define-extern coserp180 (function float float float float)) +(define-extern sign (function float float)) +(define-extern sinerp (function float float float float)) +;; unknown type +(define-extern ease-in-out (function int int float)) + + +(define-extern asin (function float float)) +;;(define-extern sincos-table object) ;; unknown type +;;(define-extern exp-strail object) ;; unknown type + +(define-extern coserp-clamp (function float float float float)) +(define-extern tan (function float float)) + +;; ;; unknown type + +;;(define-extern binary-table object) ;; unknown type + +(define-extern sincos! (function (pointer float) float int)) +(define-extern sincos-rad! (function (pointer float) float int)) + + + +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; GSOUND-H ;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~; + ;(define-extern sound-rpc-set-falloff-curve object) ;; unknown type ;;(define-extern *sound-bank-2* object) ;; unknown type @@ -31720,8 +31815,8 @@ ;;(define-extern transformq object) ;; unknown type ;;(define-extern trsqv object) ;; unknown type ;;(define-extern trsq object) ;; unknown type -(define-extern deg-diff function) -(define-extern vector-y-angle function) + +(define-extern vector-y-angle (function vector float)) (define-extern quaternion-zero! function) @@ -31775,18 +31870,13 @@ (define-extern vector-y-quaternion! function) (define-extern vector-rotate-y! function) (define-extern quaternion-i! function) -(define-extern sincos-rad! function) -(define-extern vector-sincos-rad! function) -(define-extern atan-series-rad function) -(define-extern atan2-rad function) + + + (define-extern quaternion-from-two-vectors-max-angle! function) -(define-extern vector-rad<-vector-deg/2! function) -(define-extern vector-sin-rad! function) -(define-extern acos-rad function) -(define-extern acos function) (define-extern vector-reflect-flat! function) (define-extern forward-down->inv-matrix function) (define-extern circle-test function) @@ -31830,38 +31920,7 @@ (define-extern vector-reflect-flat-above! function) (define-extern vector-circle-tangent-new function) -(define-extern coserp function) -(define-extern sinerp-clamp function) -;;(define-extern exp-slead object) ;; unknown type -(define-extern coserp180-clamp function) -;;(define-extern *sin-poly-vec2* object) ;; unknown type -(define-extern exp function) -(define-extern deg-lerp-clamp function) -(define-extern deg- function) -(define-extern atan-rad function) -(define-extern deg-seek function) -(define-extern coserp180 function) -(define-extern sign function) -(define-extern sinerp function) -;;(define-extern *sin-poly-vec* object) ;; unknown type -(define-extern ease-in-out function) -(define-extern cos-rad function) -(define-extern atan0 function) -(define-extern asin function) -(define-extern vector-cos-rad! function) -;;(define-extern sincos-table object) ;; unknown type -;;(define-extern exp-strail object) ;; unknown type -(define-extern tan-rad function) -(define-extern coserp-clamp function) -(define-extern tan function) -(define-extern sin-rad function) -;;(define-extern float-type object) ;; unknown type -(define-extern deg-seek-smooth function) -;;(define-extern binary-table object) ;; unknown type -;;(define-extern *cos-poly-vec* object) ;; unknown type -(define-extern sincos! function) -(define-extern vector-rad<-vector-deg! function) -(define-extern radmod function) + ; ;;(define-extern vif-stat object) ;; unknown type ;;(define-extern vif-fbrst object) ;; unknown type diff --git a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc new file mode 100644 index 0000000000..6308f8228a --- /dev/null +++ b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc @@ -0,0 +1,32 @@ +{ + "math":[ + ["L41", "float", true], + ["L34", "float", true], + ["L35", "float", true] + ], + + "trigonometry":[ + ["L143", "float", true], + ["L144", "float", true], + ["L145", "float", true], + ["L137", "float", true], + ["L106", "float", true], + ["L134", "float", true], + ["L112", "float", true], + ["L114", "float", true], + ["L135", "float", true], + ["L121", "float", true], + ["L150", "float", true], + ["L147", "float", true], + ["L149", "float", true], + ["L107", "float", true], + ["L129", "float", true], + ["L152", "float", true], + ["L109", "float", true], + ["L138", "float", true], + ["L127", "float", true], + ["L128", "float", true], + ["L110", "float", true], + ["L136", "float", true] + ] +} \ No newline at end of file diff --git a/decompiler/config/jak1_ntsc_black_label/var_names.jsonc b/decompiler/config/jak1_ntsc_black_label/var_names.jsonc index 2272db7820..e1151c9667 100644 --- a/decompiler/config/jak1_ntsc_black_label/var_names.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/var_names.jsonc @@ -345,7 +345,41 @@ "previous-brother":{ "args":["proc"], "vars":{"v1-0":"parent", "v1-2":"child"} + }, + + "deg-seek":{ + "args":["in", "target", "max-diff"], + "vars":{"v1-1":"in-int", "a0-2":"target-int", "a1-2":"max-diff-int", "a2-1":"diff", "a3-0":"abs-diff"} + }, + + "deg-seek-smooth":{ + "args":["in", "target", "max-diff", "amount"], + "vars":{"f0-1":"step"} + }, + + "deg-lerp-clamp":{ + "args":["min-val", "max-val", "in"] + }, + + "sinerp-clamp":{ + "args":["minimum", "maximum", "amount"] + }, + + "coserp-clamp":{ + "args":["minimum", "maximum", "amount"] + }, + "coserp":{ + "args":["minimum", "maximum", "amount"] + }, + + "coserp180-clamp":{ + "args":["minimum", "maximum", "amount"] + }, + "coserp180":{ + "args":["minimum", "maximum", "amount"] + }, + "ease-in-out":{ + "args":["total", "progress"] } - } \ No newline at end of file diff --git a/doc/code_status.md b/doc/code_status.md index 0ffaadeb35..966634dc3c 100644 --- a/doc/code_status.md +++ b/doc/code_status.md @@ -27,5 +27,61 @@ ## `dgo-h`: **Done** - Just type definitions. These don't seem to match the version of DGO files found in the game, so maybe this is outdated? Also GOAL never sees DGOs, they are always processed on the IOP. -## `gstate`: +## `gstate`: **Done** - Doing a `go` from a non-main thread of the process that is changing state is implemented a tiny bit differently. I don't think it should matter. + +# ENGINE + +## `types-h`: **Done** +- Just some bitfield types. + +## `vu1-macros`: **Done** +- Empty + +## `math`: **Done** +- The VU random generator has been rewritten, it used the PS2's (very bad) random hardware +- The "31 bit" integer random generator was rewritten, it used very strange inline assembly. + +## `vector-h`: **Done** +- Has some very simple, manually rewritten VU functions + +## `gravity-h`: **Done** +- Empty + +## `bounding-box-h`: **Done** + +## `matrix-h`: **Done** +- `matrix-copy!` is a good example of where the OpenGOAL compiler's register allocator does poorly. + +## `quaternion-h`: **Done** +- No comments + +## `euler-h`: **Done** +- Uses boxed arrays + +## `transform-h`: **Done** +- No comments + +## `geometry-h`: **Done** +- No comments + +## `trigonometry-h`: **Done** +- Empty + +## `transformq-h`: +- Needs stack stuff + +## `bounding-box`: + +## `matrix`: + +## `transform`: + +## `quaternion`: + +## `euler`: + +## `geometry`: + +## `trigonometry`: **Done** +- `sincos!` and `sincos-rad!` have a bug where cosine is slightly off diff --git a/doc/goal_doc.md b/doc/goal_doc.md index ee9df6e599..6a44210ce0 100644 --- a/doc/goal_doc.md +++ b/doc/goal_doc.md @@ -1419,6 +1419,23 @@ The outer product is computed like so (only x,y,z components are operated on): ``` Wrapper around `vblendps` (VEX xmm128 version) instruction. The `mask` must evaluate to a constant integer at compile time. The integer must be in the range of 0-15. +## `.itof.vf` and `.ftoi.vf` +``` +(.itof.vf dst src [:mask mask-val] [:color #t|#f]) +(.ftoi.vf dst src [:mask mask-val] [:color #t|#f]) +``` + +Wrapper around `vcvtdq2ps` and `vcvtps2dq` to convert packed 32-bit signed integers to packed 32-bit floats and back. The `mask` and `color` arguments behave like other assembly operations. + +## `.pw.sra`, `.pw.srl`, and `pw.sll` +``` +(.pw.sra dst src shift-amount [:mask mask-val] [:color #t|#f]) +(.pw.srl dst src shift-amount [:mask mask-val] [:color #t|#f]) +(.pw.sll dst src shift-amount [:mask mask-val] [:color #t|#f]) +``` + +Wrapper around `vpsrld`, `vpsrad`, and `vpslld`. Does shifts on each of the 4 32-bit integers in the register. + # Compiler Forms - Unsorted ## `let` diff --git a/game/kernel/asm_funcs.asm b/game/kernel/asm_funcs.asm index 94298f747f..ffe12d3852 100644 --- a/game/kernel/asm_funcs.asm +++ b/game/kernel/asm_funcs.asm @@ -14,6 +14,17 @@ _stack_call_linux: pop rax ; align stack sub rsp, 8 + + sub rsp, 128 + movaps [rsp], xmm8 + movaps [rsp + 16], xmm9 + movaps [rsp + 32], xmm10 + movaps [rsp + 48], xmm11 + movaps [rsp + 64], xmm12 + movaps [rsp + 80], xmm13 + movaps [rsp + 96], xmm14 + movaps [rsp + 112], xmm15 + ; create stack array of arguments push r11 push r10 @@ -23,6 +34,7 @@ _stack_call_linux: push rdx push rsi push rdi + ; set first argument mov rdi, rsp ; call function @@ -36,6 +48,17 @@ _stack_call_linux: pop r9 pop r10 pop r11 + + movaps xmm8, [rsp] + movaps xmm9, [rsp + 16] + movaps xmm10, [rsp + 32] + movaps xmm11, [rsp + 48] + movaps xmm12, [rsp + 64] + movaps xmm13, [rsp + 80] + movaps xmm14, [rsp + 96] + movaps xmm15, [rsp + 112] + add rsp, 128 + ; restore stack add rsp, 8 ; return! @@ -48,6 +71,16 @@ _stack_call_win32: ; to make sure the stack frame is aligned sub rsp, 8 + sub rsp, 128 + movaps [rsp], xmm8 + movaps [rsp + 16], xmm9 + movaps [rsp + 32], xmm10 + movaps [rsp + 48], xmm11 + movaps [rsp + 64], xmm12 + movaps [rsp + 80], xmm13 + movaps [rsp + 96], xmm14 + movaps [rsp + 112], xmm15 + ; push all registers and create the register array on the stack push r11 push r10 @@ -76,6 +109,17 @@ _stack_call_win32: pop r9 pop r10 pop r11 + + movaps xmm8, [rsp] + movaps xmm9, [rsp + 16] + movaps xmm10, [rsp + 32] + movaps xmm11, [rsp + 48] + movaps xmm12, [rsp + 64] + movaps xmm13, [rsp + 80] + movaps xmm14, [rsp + 96] + movaps xmm15, [rsp + 112] + add rsp, 128 + add rsp, 8 ret diff --git a/goal_src/engine/math/matrix-h.gc b/goal_src/engine/math/matrix-h.gc index d60b9fe038..b13d134cdc 100644 --- a/goal_src/engine/math/matrix-h.gc +++ b/goal_src/engine/math/matrix-h.gc @@ -42,8 +42,19 @@ (defun matrix-copy! ((dst matrix) (src matrix)) "Copy src to dst." - ;; actual implementation is in assembly, unrolled quad copies, loads/stores spaced out. - (dotimes (i 16 dst) - (set! (-> dst data i) (-> src data i)) + (rlet ((r0 :class vf) + (r1 :class vf) + (r2 :class vf) + (r3 :class vf) + ) + (.lvf r0 (&-> src quad 0)) + (.lvf r1 (&-> src quad 1)) + (.lvf r2 (&-> src quad 2)) + (.lvf r3 (&-> src quad 3)) + (.svf (&-> dst quad 0) r0) + (.svf (&-> dst quad 1) r1) + (.svf (&-> dst quad 2) r2) + (.svf (&-> dst quad 3) r3) ) - ) \ No newline at end of file + dst + ) diff --git a/goal_src/engine/math/trigonometry.gc b/goal_src/engine/math/trigonometry.gc index a6308e1d3d..8b0b8e8867 100644 --- a/goal_src/engine/math/trigonometry.gc +++ b/goal_src/engine/math/trigonometry.gc @@ -5,3 +5,1381 @@ ;; name in dgo: trigonometry ;; dgos: GAME, ENGINE +;; The "rotation" unit stores an angle in a float, where 1.0 = 1/65,536 (1/2^16) of a rotation. +;; Use the ~r format specifier to print rotations as degrees. +;; In general, functions which use these units will only be accurate to within 1/65,536th of a rotation, +;; as they often internally convert the float to an integer. These function also handle wrapping +;; correctly, and will output angles in the range -32768 to 32768 (+/- one half of a rotation) +;; Functions with these units have deg or nothing special in the name. + +;; Some functions use radians. These typically have rad in the name, and they don't handle wrapping. +;; The input must be in the range -pi to pi + +;; General note on floating point constants: to avoid ambiguity/rounding issues related to printing/parsing, weird +;; constants are stored as hex. Commonly used constants that are exactly represented (1, 0.5, etc) will appear +;; normally. + + +;; There is a bug in some of the cosine functions that can be fixed by toggling this flag. +(defglobalconstant FIX_COSINE_BUG #f) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Floating Point Constants +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defconstant ROT_TO_RAD (the-as float #x38c90fda)) +(defconstant PI (the-as float #x40490fda)) +(defconstant MINUS_PI (the-as float #xc0490fda)) +(defconstant PI_OVER_2 (the-as float #x3fc90fda)) +(defconstant TWO_PI (the-as float #x40c90fda)) + +;; convert number of degrees to a rotation. +(defmacro degrees (value) + `(* (/ (the float ,value) 360.0) + 65536.0 + ) + ) + +(defun radmod ((arg0 float)) + "Wrap arg0 to be within (-pi, pi)." + (local-vars (f0-1 float)) + (set! f0-1 (+ PI arg0)) + (if (< 0.0 f0-1) + (+ MINUS_PI (- f0-1 (* (the float (the int (/ f0-1 TWO_PI))) TWO_PI))) + (+ PI (- f0-1 (* (the float (the int (/ f0-1 TWO_PI))) TWO_PI))) + ) + ) + + +(defun deg- ((arg0 float) (arg1 float)) + "Compute arg0-arg1, unwrapped, using rotation units. + Result should be in the range (-180, 180)" + (the float (sar (- (shl (the int arg0) 48) + (shl (the int arg1) 48)) + 48) + ) + ) + +(defun deg-diff ((arg0 float) (arg1 float)) + "Very similar to the function above, but computes arg1 - arg0 instead." + (the float (sar (- (shl (the int arg1) 48) + (shl (the int arg0) 48)) + 48) + ) + ) + +(defun deg-seek ((in float) (target float) (max-diff float)) + "Move in toward target by at most max-diff, using rotation units" + (local-vars + (in-int int) + (target-int int) + (max-diff-int int) + (diff int) + (abs-diff int) + ) + (set! in-int (shl (the int in) 48)) + (set! target-int (shl (the int target) 48)) + (set! max-diff-int (shl (the int max-diff) 48)) + (set! diff (- target-int in-int)) + (set! abs-diff (abs diff)) + (the float + (sar (cond + ((< abs-diff 0) ;; ?? + (+ in-int max-diff-int) + ) + ((>= max-diff-int abs-diff) + target-int + ) + ((>= diff 0) + (+ in-int max-diff-int) + ) + (else + (- in-int max-diff-int) + ) + ) + 48 + ) + ) + ) + +(defun deg-seek-smooth ((in float) (target float) (max-diff float) (amount float)) + "Step amount of the way from in to target, by at most max-diff, using rotation units" + (local-vars (step float)) + ;; how much we want to go + (set! step (* (deg- target in) amount)) + ;; can we make it in one go? + (if (< max-diff (fabs step)) + ;; nope, saturate. + (if (>= step 0.00000000) + (set! step max-diff) + (set! step (- max-diff)) + ) + ) + (+ in step) + ) + +(defun deg-lerp-clamp ((min-val float) (max-val float) (in float)) + "Map [0, 1] to min-val, max-val, handling wrapping and saturating, using rotation units." + (cond + ((>= 0.00000000 in) + min-val + ) + ((>= in 1.00000000) + max-val + ) + (else + (the float + (sar (shl (the int (+ min-val + (* in (deg-diff min-val max-val)))) + 48) + 48) + ) + ) + ) + ) + +;; create a static array with the given values, interpreted as floats. +(defmacro make-float-table (name size vals) + `(define ,name (new 'static 'boxed-array float ,size + ,@(apply (lambda (x) `(the-as float ,x)) vals))) + ) + +;; table[x] = 1/(2^x) +;; unused? +(make-float-table + binary-table + 32 + (#x3f800000 ;; 1.0 + #x3f000000 ;; 0.5 + #x3e800000 ;; 0.25 + #x3e000000 ;; ... + #x3d800000 + #x3d000000 + #x3c800000 + #x3c000000 + #x3b800000 + #x3b000000 + #x3a800000 + #x3a000000 + #x39800000 + #x39000000 + #x38800000 + #x38000000 + #x37800000 + #x37000000 + #x36800000 + #x36000000 + #x35800000 + #x35000000 + #x34800000 + #x34000000 + #x33800000 + #x33000000 + #x32800000 + #x32000000 + #x31800000 + #x31000000 + #x30800000 + #x30000000 + ) + ) + +;; not sure what these values are yet. +;; unused. +(make-float-table + sincos-table + 32 + (#x3f490fdb + #x3eed6338 + #x3e7adbb0 + #x3dfeadd5 + #x3d7faade + #x3cffeaae + #x3c7ffaab + #x3bfffeab + #x3b7fffab + #x3affffeb + #x3a7ffffb + #x39fffffe + #x39800000 + #x39000000 + #x38800000 + #x38000000 + #x37800000 + #x37000000 + #x36800000 + #x36000000 + #x35800000 + #x35000000 + #x34800000 + #x34000000 + #x33800000 + #x33000000 + #x32800000 + #x32000000 + #x31800000 + #x31000000 + #x30800000 + #x30000000 + ) + ) + +(defun sin ((arg0 float)) + "Compute the sine of an angle in rotation units. Unwraps it." + (local-vars + (f0-1 float) + (f0-3 float) + (f0-4 float) + (f1-4 float) + (f1-5 float) + (f1-6 float) + (f1-7 float) + (f2-0 float) + (f2-1 float) + (f2-2 float) + (f2-3 float) + ) + (set! f2-0 (* ROT_TO_RAD (the float (sar (shl (the int arg0) 48) 48)))) + (set! f0-1 f2-0) + (set! f1-4 (* (the-as float #x3f7fffde) f2-0)) + (set! f0-3 (* f2-0 f2-0)) + (set! f2-1 (* f2-0 f0-3)) + (set! f1-5 (+ f1-4 (* (the-as float #xbe2aa8f5) f2-1))) + (set! f2-2 (* f2-1 f0-3)) + (set! f1-6 (+ f1-5 (* (the-as float #x3c086bf6) f2-2))) + (set! f2-3 (* f2-2 f0-3)) + (set! f1-7 (+ f1-6 (* (the-as float #xb94d2072) f2-3))) + (set! f0-4 (* f2-3 f0-3)) + (+ f1-7 (* (the-as float #x361aa27f) f0-4)) + ) + +(defun sin-rad ((arg0 float)) + "Compute the sine of an angle in radians. + No unwrap is done, should be in -pi, pi" + (local-vars + (f1-0 float) + (f2-0 float) + (f3-0 float) + (f4-0 float) + (f5-0 float) + (f6-0 float) + (f7-0 float) + (f8-0 float) + (f9-0 float) + (f10-0 float) + (f11-0 float) + (acc float) + ) + (set! f1-0 (* arg0 arg0)) + (set! f7-0 (the-as float #x3f7fffde)) + (set! f8-0 (the-as float #xbe2aa8f5)) + (set! f2-0 (* arg0 f1-0)) + (set! f3-0 (* f1-0 f1-0)) + (set! f9-0 (the-as float #x3c086bf6)) + (set! f4-0 (* f2-0 f1-0)) + (set! f5-0 (* f3-0 f2-0)) + (set! f10-0 (the-as float #xb94d2072)) + (set! f6-0 (* f4-0 f3-0)) + (set! f11-0 (the-as float #x361aa27f)) + ;;(.mula.s arg0 f7-0) + (set! acc (* arg0 f7-0)) + ;;(.madda.s f2-0 f8-0) + (set! acc (+ acc (* f2-0 f8-0))) + ;;(.madda.s f4-0 f9-0) + (set! acc (+ acc (* f4-0 f9-0))) + ;;(.madda.s f5-0 f10-0) + (set! acc (+ acc (* f5-0 f10-0))) + ;;(.madd.s f12-0 f6-0 f11-0) + (+ acc (* f6-0 f11-0)) + ) + +;; taylor series coefficients for sine approximation +(define *sin-poly-vec* (new 'static 'vector + :x (the-as float #xbe2aa8f5) ;; -1/3! + :y (the-as float #x3c086bf6) ;; 1/5! + :z (the-as float #xb94d2072) ;; -1/7! + :w (the-as float #x361aa27f) ;; 1/9? + ) + ) + +;; +(define *sin-poly-vec2* (new 'static 'vector + :x (the-as float #x3f7fffde) + :y 0.0 + :z 0.0 + :w 0.0 + ) + ) + +(defun vector-sin-rad! ((dst vector) (src vector)) + "Taylor series approximation of sine on all 4 elements in a vector. + Inputs should be in radians, in -pi to pi. + Somehow their coefficients are a little bit off. + Like the first coefficient, which should obviously be 1, is not quite 1." + (rlet ((vf1 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + (vf5 :class vf) + (vf6 :class vf) + (vf7 :class vf) + (vf8 :class vf) + (vf9 :class vf) + (vf10 :class vf) + (acc :class vf)) + ;; (.lqc2 vf1 0 arg1) + (.lvf vf1 src) + ;; (.vmul.xyzw vf3 vf1 vf1) + (.mul.vf vf3 vf1 vf1) + ;; (set! v1-0 *sin-poly-vec2*) + ;; (.lqc2 vf10 0 v1-0) + (.lvf vf10 *sin-poly-vec2*) + ;; (set! v1-1 *sin-poly-vec*) + ;; (.lqc2 vf9 0 v1-1) + (.lvf vf9 *sin-poly-vec*) + ;; (.vmul.xyzw vf4 vf3 vf1) + (.mul.vf vf4 vf3 vf1) + ;; (.vmul.xyzw vf5 vf3 vf3) + (.mul.vf vf5 vf3 vf3) + ;; (.vmulax.xyzw acc vf1 vf10) + (.mul.x.vf acc vf1 vf10) + ;; (.vmul.xyzw vf6 vf4 vf3) + (.mul.vf vf6 vf4 vf3) + ;; (.vmul.xyzw vf7 vf5 vf4) + (.mul.vf vf7 vf5 vf4) + ;; (.vmaddax.xyzw acc vf4 vf9) + (.add.mul.x.vf acc vf4 vf9 acc) + ;; (.vmul.xyzw vf8 vf6 vf5) + (.mul.vf vf8 vf6 vf5) + ;; (.vmadday.xyzw acc vf6 vf9) + (.add.mul.y.vf acc vf6 vf9 acc) + ;; (.vmaddaz.xyzw acc vf7 vf9) + (.add.mul.z.vf acc vf7 vf9 acc) + ;; (.vmaddw.xyzw vf2 vf8 vf9) + (.add.mul.w.vf vf2 vf8 vf9 acc) + ;; (.sqc2 vf2 0 arg0) + (.svf dst vf2) + dst + ;; arg0 + ) + ) + +(defun cos-rad ((arg0 float)) + "Cosine with taylor series. Input is in radians, in -pi, pi. + - TODO constants" + (local-vars + (f1-0 float) + (f3-0 float) + (f4-0 float) + (f5-0 float) + (f7-0 float) + (f8-0 float) + (f9-0 float) + (f10-0 float) + (f11-0 float) + (acc float) + ) + (set! f1-0 (* arg0 arg0)) + (set! f7-0 1.000000) + (set! f8-0 (the-as float #xbefffd62)) + (set! f3-0 (* f1-0 f1-0)) + (set! f9-0 (the-as float #x3d2a7a28)) + (set! f10-0 (the-as float #xbab2bc31)) + (set! f4-0 (* f3-0 f1-0)) + (set! f5-0 (* f3-0 f3-0)) + (set! f11-0 (the-as float #x37a933eb)) + ;;(.mula.s f7-0 f7-0) + (set! acc (* f7-0 f7-0)) + ;;(.madda.s f8-0 f1-0) + (set! acc (+ acc (* f8-0 f1-0))) + ;;(.madda.s f9-0 f3-0) + (set! acc (+ acc (* f9-0 f3-0))) + ;;(.madda.s f10-0 f4-0) + (set! acc (+ acc (* f10-0 f4-0))) + ;;(.madd.s f12-0 f11-0 f5-0) + (+ acc (* f11-0 f5-0)) + ;;(the-as float f12-0) + ) + + +(define *cos-poly-vec* + (new 'static 'vector + :x (the-as float #xbefffd62) + :y (the-as float #x3d2a7a28) + :z (the-as float #xbab2bc31) + :w (the-as float #x37a933eb) + ) + ) + +(defun vector-cos-rad! ((dst vector) (src vector)) + "Compute the cosine of all 4 vector elements. + Radians, with no wrapping. Uses taylor series with 4 coefficients." + (rlet ((vf0 :class vf) ;; 0,0,0,1 + (vf1 :class vf) ;; src + (vf2 :class vf) ;; + (vf3 :class vf) ;; src^2's + (vf4 :class vf) + (vf5 :class vf) + (vf6 :class vf) + (vf9 :class vf) ;; coeffs + (acc :class vf)) + (.lvf vf0 (new 'static 'vector :x 0.0 :y 0.0 :z 0.0 :w 1.0)) + ;;(.lqc2 vf1 0 arg1) + (.lvf vf1 src) + ;;(.vsub.xyzw vf2 vf2 vf2) + (.xor.vf vf2 vf2 vf2) + ;;(set! v1-0 *cos-poly-vec*) + ;;(.lqc2 vf9 0 v1-0) + (.lvf vf9 *cos-poly-vec*) + ;;(.vmul.xyzw vf3 vf1 vf1) + (.mul.vf vf3 vf1 vf1) ;; squareds + ;;(.vaddaw.xyzw acc vf2 vf0) + (.add.w.vf acc vf2 vf0) + ;;(.vmul.xyzw vf4 vf3 vf3) + (.mul.vf vf4 vf3 vf3) + ;;(.vmaddax.xyzw acc vf3 vf9) + (.add.mul.x.vf acc vf3 vf9 acc) + ;;(.vmul.xyzw vf5 vf4 vf3) + (.mul.vf vf5 vf4 vf3) + ;;(.vmadday.xyzw acc vf4 vf9) + (.add.mul.y.vf acc vf4 vf9 acc) + ;;(.vmul.xyzw vf6 vf4 vf4) + (.mul.vf vf6 vf4 vf4) + ;;(.vmaddaz.xyzw acc vf5 vf9) + (.add.mul.z.vf acc vf5 vf9 acc) + ;;(.vmaddw.xyzw vf2 vf6 vf9) + (.add.mul.w.vf vf2 vf6 vf9 acc) + ;;(.sqc2 vf2 0 arg0) + (.svf dst vf2) + dst + ) + ) + + +(defun vector-sincos-rad! ((dst-sin vector) (dst-cos vector) (src vector)) + "Compute the sine and cosine of each element of src, storing it in dst-sin and dst-cos. + This is more efficient than separate calls to sin and cos. + Inputs should be radians in -pi to pi." + (rlet ((vf0 :class vf) + (vf1 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (vf4 :class vf) + (vf5 :class vf) + (vf6 :class vf) + (vf7 :class vf) + (vf8 :class vf) + (vf9 :class vf) + (vf10 :class vf) + (vf11 :class vf) + (vf12 :class vf) + (vf13 :class vf) + (vf14 :class vf) + (acc :class vf) + ) + (.lvf vf0 (new 'static 'vector :x 0.0 :y 0.0 :z 0.0 :w 1.0)) + ;; (.lqc2 vf1 0 arg2) + (.lvf vf1 src) + ;; (.vsub.xyzw vf14 vf14 vf14) + (.xor.vf vf14 vf14 vf14) + ;; (set! v1-0 *sin-poly-vec2*) + ;; (.lqc2 vf11 0 v1-0) + (.lvf vf11 *sin-poly-vec2*) + ;; (.vmul.xyzw vf2 vf1 vf1) + (.mul.vf vf2 vf1 vf1) + ;; (set! v1-1 *sin-poly-vec*) + ;; (.lqc2 vf10 0 v1-1) + (.lvf vf10 *sin-poly-vec*) + ;; (set! v1-2 *cos-poly-vec*) + ;; (.lqc2 vf13 0 v1-2) + (.lvf vf13 *cos-poly-vec*) + ;; (.vmulax.xyzw acc vf1 vf11) + (.mul.x.vf acc vf1 vf11) + ;; (.vmul.xyzw vf3 vf2 vf1) + (.mul.vf vf3 vf2 vf1) + ;; (.vmul.xyzw vf4 vf2 vf2) + (.mul.vf vf4 vf2 vf2) + ;; (.vmul.xyzw vf5 vf3 vf2) + (.mul.vf vf5 vf3 vf2) + ;; (.vmul.xyzw vf6 vf3 vf3) + (.mul.vf vf6 vf3 vf3) + ;; (.vmul.xyzw vf7 vf4 vf3) + (.mul.vf vf7 vf4 vf3) + ;; (.vmul.xyzw vf8 vf4 vf4) + (.mul.vf vf8 vf4 vf4) + ;; (.vmul.xyzw vf9 vf5 vf4) + (.mul.vf vf9 vf5 vf4) + ;; (.vmaddax.xyzw acc vf3 vf10) + (.add.mul.x.vf acc vf3 vf10 acc) + ;; (.vmadday.xyzw acc vf5 vf10) + (.add.mul.y.vf acc vf5 vf10 acc) + ;; (.vmaddaz.xyzw acc vf7 vf10) + (.add.mul.z.vf acc vf7 vf10 acc) + ;; (.vmaddw.xyzw vf12 vf9 vf10) + (.add.mul.w.vf vf12 vf9 vf10 acc) + ;; (.vaddaw.xyzw acc vf14 vf0) + (.add.w.vf acc vf14 vf0) + ;; (.vmaddax.xyzw acc vf2 vf13) + (.add.mul.x.vf acc vf2 vf13 acc) + ;; (.vmadday.xyzw acc vf4 vf13) + (.add.mul.y.vf acc vf4 vf13 acc) + ;; (.vmaddaz.xyzw acc vf6 vf13) + (.add.mul.z.vf acc vf6 vf13 acc) + ;; (.vmaddw.xyzw vf14 vf8 vf13) + (.add.mul.w.vf vf14 vf8 vf13 acc) + ;; (.sqc2 vf12 0 arg0) + (.svf dst-sin vf12) + ;; (.sqc2 vf14 0 arg1) + (.svf dst-cos vf14) + ;; (set! v0-0 0) + 0 + ) + ) + +(defmacro sincos-rad-asm (out x) + ;; Compute the sine and cosine of x, store it in the output array + ;; this assembly is shared in two functions. + `(rlet ((f10 :class fpr :type float) ;; coeff 1.0 + (f11 :class fpr :type float) ;; coeff -1/3! + (f12 :class fpr :type float) ;; coeff 1/5! + (f14 :class fpr :type float) ;; coeff -1/7! + (f15 :class fpr :type float) ;; coeff 1/9! + (f1 :class fpr :type float) ;; x + (f2 :class fpr :type float) ;; x^2 + (f3 :class fpr :type float) ;; x^3 + (f4 :class fpr :type float) ;; x^4 + (f5 :class fpr :type float) ;; x^5 + (f6 :class fpr :type float) ;; x^6 + (f7 :class fpr :type float) ;; x^7 + (f8 :class fpr :type float) ;; x^8 + (f9 :class fpr :type float) ;; x^9 + (f21 :class fpr :type float) + (f22 :class fpr :type float) ;; 0 ? + (acc :class fpr :type float) ;; temp + (f16 :class fpr :type float) ;; 1.0 + (f17 :class fpr :type float) ;; cos coeff 1 + (f18 :class fpr :type float) ;; cos coeff 2 + (f19 :class fpr :type float) ;; cos coeff 3 + (f20 :class fpr :type float) ;; cos coeff 4 + ) + ;; lui v1, 16255 + ;; lui a2, -16854 + ;; ori v1, v1, 65502 + ;; mtc1 f1, a1 + (set! f1 ,x) + ;; ori a1, a2, 43253 + ;; sub.s f22, f22, f22 + (set! f22 (the-as float 0)) + ;; lui a2, 15368 + ;; mtc1 f10, v1 + (set! f10 (the-as float #x3F7FFFDE)) ;; almost 1.0 + ;; ori v1, a2, 27638 + ;; mtc1 f11, a1 + (set! f11 (the-as float #xBE2AA8F5)) ;; -0.166, 1/3! + ;; lui a1, -18099 + ;; mul.s f2, f1, f1 + (set! f2 (* f1 f1)) + ;; ori a1, a1, 8306 + ;; mtc1 f12, v1 + (set! f12 (the-as float #x3C086BF6)) ;; 1/5! + ;; lui v1, 13850 + ;; mtc1 f14, a1 + (set! f14 (the-as float #xB94D2072)) ;; 1/7! + ;; ori a1, v1, 41599 + ;; mula.s f1, f10 + (set! acc (* f1 f10)) ;; x * c_1 + ;; lui v1, 16256 + ;; mul.s f3, f2, f1 + (set! f3 (* f2 f1)) ;; x^3 + ;; or v1, v1, r0 + ;; mul.s f4, f2, f2 + (set! f4 (* f2 f2)) ;; x^4 + ;; lui a2, -16641 + ;; mtc1 f15, a1 + (set! f15 (the-as float #x361AA27F)) ;; 1/9! + ;; lui a1, -16641 ;; I think this is a typo... + ;; or a1, a2, a1 ;; this should set the lower 16 bits. + ;; mtc1 f16, v1 + (set! f16 (the-as float #x3f800000)) ;; 1.0 + ;; sll r0, r0, 0 + ;; mtc1 f17, a1 + + ;; it looks like they set the lower 16-bits of the x^2 + ;; coefficient for cosine incorrectly + (#cond + (FIX_COSINE_BUG + ;; the constant used in *cos-poly-vec* + (set! f17 (the-as float #xbefffd62)) + ) + (#t + ;; missing the lower 16 bits. + (set! f17 (the-as float #xBEFF0000)) + ) + ) + + ;; sll r0, r0, 0 + ;; mul.s f5, f3, f2 + (set! f5 (* f3 f2)) + ;; sll r0, r0, 0 + ;; mul.s f6, f3, f3 + (set! f6 (* f3 f3)) + ;; sll r0, r0, 0 + ;; mul.s f7, f4, f3 + (set! f7 (* f4 f3)) + ;; sll r0, r0, 0 + ;; mul.s f8, f4, f4 + (set! f8 (* f4 f4)) + ;; sll r0, r0, 0 + ;; mul.s f9, f5, f4 + (set! f9 (* f5 f4)) + ;; lui v1, 15658 + ;; madda.s f3, f11 + (set! acc (+ acc (* f3 f11))) ;; add x^3 sine term + ;; ori v1, v1, 31272 + ;; madda.s f5, f12 + (set! acc (+ acc (* f5 f12))) ;; add x^5 sine term + ;; lui a1, -17742 + ;; madda.s f7, f14 + (set! acc (+ acc (* f7 f14))) ;; add x^7 sine term + ;; ori a1, a1, 48177 + ;; madd.s f21, f9, f15 + (set! f21 (+ acc (* f9 f15))) ;; add x^9 sine term + ;; lui a2, 14249 + ;; mtc1 f18, v1 + (set! f18 (the-as float #x3D2A7A28)) ;; cos coeff + ;; ori v1, a2, 13291 + ;; mtc1 f19, a1 + (set! f19 (the-as float #xBAB2BC31)) ;; cos coeff + ;; sll r0, r0, 0 + ;; mtc1 f20, v1 + (set! f20 (the-as float #x37A933EB)) + ;; sll r0, r0, 0 + ;; mula.s f16, f16 + (set! acc (* f16 f16)) ;; acc = 1, constant cos term. + ;; sll r0, r0, 0 + ;; madda.s f2, f17 + (set! acc (+ acc (* f2 f17))) + ;; sll r0, r0, 0 + ;; madda.s f4, f18 + (set! acc (+ acc (* f4 f18))) + ;; sll r0, r0, 0 + ;; madda.s f6, f19 + (set! acc (+ acc (* f6 f19))) + ;; sll r0, r0, 0 + ;; madd.s f22, f8, f20 + (set! f22 (+ acc (* f8 f20))) + ;; sll r0, r0, 0 + ;; swc1 f21, 0(a0) + (set! (-> ,out 0) f21) + ;; sll r0, r0, 0 + ;; swc1 f22, 4(a0) + (set! (-> ,out 1) f22) + ;; or v0, r0, r0 + 0 + ) + ) + +(defun sincos-rad! ((out (pointer float)) (x float)) + "Compute the sine and cosine of x, store it in the output array. + Has the cosine bug." + (sincos-rad-asm out x) + ) + +(defun sincos! ((out (pointer float)) (x float)) + "Compute the sine and cosine of x, store it in the output array. + The input is in rotation units, and is unwrapped properly. + Also has the cosine bug" + (sincos-rad-asm out (* ROT_TO_RAD (the float (sar (shl (the int x) 48) 48)))) + ) + +(defun vector-rad<-vector-deg! ((out vector) (in vector)) + "Convert a vector in rotation units to radians, and unwrap. + Input can be anything, output will be -2pi to pi." + (rlet ((rot-to-rad :class vf) + (vf1 :class vf)) + (.mov rot-to-rad (the-as float ROT_TO_RAD)) + (.lvf vf1 in) + (.ftoi.vf vf1 vf1) ;; to int + (.pw.sll vf1 vf1 16) ;; shifts + (.pw.sra vf1 vf1 16) + (.itof.vf vf1 vf1) ;; to float + (.mul.x.vf vf1 vf1 rot-to-rad) + (.svf out vf1) + ) + ) + +(defun vector-rad<-vector-deg/2! ((out vector) (in vector)) + "Divide the input by two, and then convert from rotation units to radians, unwrapping. + Not sure why this really needs to be separate the from previous function..." + (rlet ((temp :class vf) + (vf1 :class vf)) + ;;(set! v1-0 952700890) + ;;(set! a2-0 1056964608) + ;;(.lqc2 vf1 0 a1-0) + (.lvf vf1 in) + + ;; multiply by 0.5. + ;;(.qmtc2.i vf2 a2-0) + (.mov temp (the-as float #x3f000000)) ;; 0.5 + ;;(.vmulx.xyzw vf1 vf1 vf2) + (.mul.x.vf vf1 vf1 temp) + + ;;(.vftoi0.xyzw vf1 vf1) + (.ftoi.vf vf1 vf1) + ;;(.qmtc2.i vf2 v1-0) + ;;(.qmfc2.i v1-1 vf1) + ;;(.psllw v1-2 v1-1 16) + (.pw.sll vf1 vf1 16) + ;;(.psraw v1-3 v1-2 16) + (.pw.sra vf1 vf1 16) + ;;(.qmtc2.i vf1 v1-3) + ;;(.vitof0.xyzw vf1 vf1) + (.itof.vf vf1 vf1) + ;;(.vmulx.xyzw vf1 vf1 vf2) + (.mov temp (the-as float ROT_TO_RAD)) + (.mul.x.vf vf1 vf1 temp) + ;;(.sqc2 vf1 0 arg0) + ;;(.qmfc2.i v0-0 vf1) + (.svf out vf1) + 0 + ) + ) + + +(defun vector-sincos! ((out-sin vector) (out-cos vector) (in vector)) + "Compute sine and cosine of each element in a vector, in rotation units" + (let ((temp (new 'stack-no-clear 'vector))) + (vector-rad<-vector-deg! temp in) + (vector-sincos-rad! out-sin out-cos temp) + ) + ) + +(defun-extern cos float float) + +(defun tan-rad ((arg0 float)) + "This function appears to be named wrong and actually operates on rotation units." + (/ (sin arg0) (cos arg0)) + ) + +(defun cos ((arg0 float)) + "Cosine of rotation units" + (sin (+ 16384.000000 arg0)) + ) + +(defun tan ((arg0 float)) + "Correctly named tangent of rotation units" + (/ (sin arg0) (cos arg0)) + ) + +(defun atan0 ((arg0 float) (arg1 float)) + "inverse tangent, to rotation units. y,x order. Does not handle signs correctly. + Do not use this function directly, instead use atan2" + (rlet ((f20 :class fpr :type float) + (f21 :class fpr :type float) + (f1 :class fpr :type float) + (f2 :class fpr :type float) + (f3 :class fpr :type float) + (f4 :class fpr :type float) + (f5 :class fpr :type float) + (f6 :class fpr :type float) + (f7 :class fpr :type float) + (f8 :class fpr :type float) + (f9 :class fpr :type float) + (f10 :class fpr :type float) + (f19 :class fpr :type float) + (f11 :class fpr :type float) + (f12 :class fpr :type float) + (f13 :class fpr :type float) + (f14 :class fpr :type float) + (f15 :class fpr :type float) + (f16 :class fpr :type float) + (f17 :class fpr :type float) + (f18 :class fpr :type float) + (acc :class fpr :type float) + ) + + ;;mtc1 f20, a1 + (set! f20 arg1) + ;;mtc1 f21, a0 + (set! f21 arg0) + ;;sub.s f1, f21, f20 + (set! f1 (- f21 f20)) + ;;add.s f2, f21, f20 + (set! f2 (+ f21 f20)) + ;;div.s f1, f1, f2 + (set! f1 (/ f1 f2)) + ;;lwc1 f19, L132(fp) + (set! f19 (the-as float #x46000000)) + ;;lwc1 f11, L140(fp) + (set! f11 (the-as float #x4622f97c)) + ;;lwc1 f12, L151(fp) + (set! f12 (the-as float #xc55946e1)) + ;;lwc1 f13, L120(fp) + (set! f13 (the-as float #x450207fd)) + ;;lwc1 f14, L126(fp) + (set! f14 (the-as float #xc4b556ce)) + ;;lwc1 f15, L113(fp) + (set! f15 (the-as float #x447b6ca4)) + ;;mul.s f2, f1, f1 + (set! f2 (* f1 f1)) + ;;lwc1 f16, L142(fp) + (set! f16 (the-as float #xc411ca52)) + ;;lwc1 f17, L118(fp) + (set! f17 (the-as float #x43640558)) + ;;mul.s f3, f1, f2 + (set! f3 (* f1 f2)) + ;;mul.s f1, f1, f11 + (set! f1 (* f1 f11)) + ;;mul.s f4, f2, f2 + (set! f4 (* f2 f2)) + ;;lwc1 f18, L141(fp) + (set! f18 (the-as float #xc2292434)) + ;;mul.s f5, f3, f2 + (set! f5 (* f3 f2)) + ;;mul.s f6, f4, f3 + (set! f6 (* f4 f3)) + ;;mul.s f7, f5, f4 + (set! f7 (* f5 f4)) + ;;mul.s f8, f6, f4 + (set! f8 (* f6 f4)) + ;;mul.s f9, f7, f4 + (set! f9 (* f7 f4)) + ;;mul.s f10, f8, f4 + (set! f10 (* f8 f4)) + ;;adda.s f1, f19 + (set! acc (+ f1 f19)) + ;;madda.s f3, f12 + (set! acc (+ acc (* f3 f12))) + + ;;madda.s f5, f13 + (set! acc (+ acc (* f5 f13))) + + ;;madda.s f6, f14 + (set! acc (+ acc (* f6 f14))) + + ;;madda.s f7, f15 + (set! acc (+ acc (* f7 f15))) + + ;;madda.s f8, f16 + (set! acc (+ acc (* f8 f16))) + + ;;madda.s f9, f17 + (set! acc (+ acc (* f9 f17))) + + ;;madd.s f19, f10, f18 + ;;mfc1 v0, f19 + (+ acc (* f10 f18)) + ) + ) + + +(defmacro .adda.s (a b) + `(set! acc (+ ,a ,b)) + ) + +(defmacro .madda.s (a b) + `(set! acc (+ acc (* ,a ,b))) + ) + +(defmacro .madd.s (a b c) + `(set! ,a (+ acc (* ,b ,c))) + ) + +(defun atan-series-rad ((arg0 float)) + "A helper function for atan" + (local-vars + (f0-1 float) + (f1-0 float) + (f2-0 float) + (f3-0 float) + (f4-0 float) + (f5-0 float) + (f6-0 float) + (f7-0 float) + (f8-0 float) + (f9-0 float) + (f10-0 float) + (f11-0 float) + (f12-0 float) + (f13-0 float) + (f14-0 float) + (f15-0 float) + (f16-0 float) + (f17-0 float) + (f18-0 float) + (acc float) + (f18-1 float) + ) + (set! f1-0 (* arg0 arg0)) + (set! f10-0 (the-as float #x3f7ffff5)) + (set! f11-0 (the-as float #xbeaaa61c)) + (set! f2-0 (* arg0 f1-0)) + (set! f3-0 (* f1-0 f1-0)) + (set! f12-0 (the-as float #x3e4c40a6)) + (set! f4-0 (* f2-0 f1-0)) + (set! f5-0 (* f3-0 f2-0)) + (set! f13-0 (the-as float #xbe0e6c63)) + (set! f6-0 (* f4-0 f3-0)) + (set! f7-0 (* f5-0 f3-0)) + (set! f14-0 (the-as float #x3dc577df)) + (set! f8-0 (* f6-0 f3-0)) + (set! f9-0 (* f7-0 f3-0)) + (set! f15-0 (the-as float #xbd6501c4)) + (set! f18-0 (the-as float #x3f490fdb)) + (set! f0-1 (* arg0 f10-0)) + (set! f16-0 (the-as float #x3cb31652)) + (set! f17-0 (the-as float #xbb84d7e7)) + (.adda.s f0-1 f18-0) + (.madda.s f2-0 f11-0) + (.madda.s f4-0 f12-0) + (.madda.s f5-0 f13-0) + (.madda.s f6-0 f14-0) + (.madda.s f7-0 f15-0) + (.madda.s f8-0 f16-0) + (.madd.s f18-1 f9-0 f17-0) + ;;(the-as float f18-1) + f18-1 + ) + + +(defun atan-rad ((arg0 float)) + "inverse tangent in radians" + (atan-series-rad (/ (+ -1.000000 arg0) (+ 1.000000 arg0))) + ) + +(defun sign ((arg0 float)) + "Returns -1.0, 0.0, or 1.0 depending on the sign of the argument" + (cond + ((< 0.000000 arg0) + 1.000000) + ((< arg0 0.000000) + -1.000000) + (else + 0.000000) + ) + ) + +(defun atan2-rad ((arg0 float) (arg1 float)) + (local-vars + (f0-6 float) + (f0-14 float) + (f0-22 float) + (f0-28 float) + (f30-1 float) + (f30-2 float) + ) + (if (= arg1 0.000000) + (* PI_OVER_2 (sign arg0)) + (cond + ((and (< arg0 0.000000) (< arg1 0.000000)) + ;; this was probably an inline call to atan-rad + (set! f30-1 MINUS_PI) + (set! f0-6 (/ arg0 arg1)) + (+ f30-1 (atan-series-rad (/ (+ -1.000000 f0-6) (+ 1.000000 f0-6)))) + ) + ((< arg0 0.000000) + (set! f0-14 (- (/ arg0 arg1))) + (- (atan-series-rad (/ (+ -1.000000 f0-14) (+ 1.000000 f0-14)))) + ) + ((< arg1 0.000000) + (set! f30-2 PI) + (set! f0-22 (- (/ arg0 arg1))) + (- f30-2 (atan-series-rad (/ (+ -1.000000 f0-22) (+ 1.000000 f0-22)))) + ) + (else + (set! f0-28 (/ arg0 arg1)) + (atan-series-rad (/ (+ -1.000000 f0-28) (+ 1.000000 f0-28))) + ) + ) + ) + ) + +;; ???? +(deftype float-type (uint32) + () + :flag-assert #x900000004 + ) + +;; magic numbers for exp. +(define exp-slead + (new 'static 'array float 32 + (the-as float #x3f800000) + (the-as float #x3f82cd80) + (the-as float #x3f85aac0) + (the-as float #x3f889800) + (the-as float #x3f8b95c0) + (the-as float #x3f8ea400) + (the-as float #x3f91c3c0) + (the-as float #x3f94f4c0) + (the-as float #x3f9837c0) + (the-as float #x3f9b8d00) + (the-as float #x3f9ef500) + (the-as float #x3fa27040) + (the-as float #x3fa5fec0) + (the-as float #x3fa9a140) + (the-as float #x3fad5800) + (the-as float #x3fb123c0) + (the-as float #x3fb504c0) + (the-as float #x3fb8fb80) + (the-as float #x3fbd0880) + (the-as float #x3fc12c40) + (the-as float #x3fc56700) + (the-as float #x3fc9b980) + (the-as float #x3fce2480) + (the-as float #x3fd2a800) + (the-as float #x3fd744c0) + (the-as float #x3fdbfb80) + (the-as float #x3fe0ccc0) + (the-as float #x3fe5b900) + (the-as float #x3feac0c0) + (the-as float #x3fefe480) + (the-as float #x3ff52540) + (the-as float #x3ffa8380) + ) + ) + +(define exp-strail + (new 'static 'array float 32 + (the-as float #x0) + (the-as float #x35531585) + (the-as float #x34d9f312) + (the-as float #x35e8092e) + (the-as float #x3471f546) + (the-as float #x36e62d17) + (the-as float #x361b9d59) + (the-as float #x36bea3fc) + (the-as float #x36c14637) + (the-as float #x36e6e755) + (the-as float #x36c98247) + (the-as float #x34c0c312) + (the-as float #x36354d8b) + (the-as float #x3655a754) + (the-as float #x36fba90b) + (the-as float #x36d6074b) + (the-as float #x36cccfe7) + (the-as float #x36bd1d8c) + (the-as float #x368e7d60) + (the-as float #x35cca667) + (the-as float #x36a84554) + (the-as float #x36f619b9) + (the-as float #x35c151f8) + (the-as float #x366c8f89) + (the-as float #x36f32b5a) + (the-as float #x36de5f6c) + (the-as float #x36776155) + (the-as float #x355cef90) + (the-as float #x355cfba5) + (the-as float #x36e66f73) + (the-as float #x36f45492) + (the-as float #x36cb6dc9) + ) + ) + + +(defun exp ((arg float)) + (local-vars + (f0 float) + (f1 float) + (f2 float) + (f3 float) + (f4 float) + (f5 float) + (f6 float) + (f7 float) + (f8 float) + (f10 float) + (f11 float) + (f12 float) + (f13 float) + (f14 float) + (f15 float) + (f16 float) + (f17 float) + (f18 float) + (a2 int) + (v0 float) + (v1 int) + (a1 int) + (a3 int) + (t0 int) + (a0-2 int) + ) + + (set! f0 arg) + (set! f0 (fabs f0)) + (set! f1 (the-as float #x435c6bba)) + ;;(b! (>=.s f1 f0) L44 (nop!)) + (when-goto (>= f1 f0) L44) + + (set! f0 0.0) + (set! f1 arg) + ;;(b! (>=.s f0 f1) L42 (nop!)) + (when-goto (>= f0 f1) L42) + + (set! v0 (the-as float #x7f7fffff)) + ;;(b! #t L43 (nop!)) + (goto L43) + + (label L42) + (set! v0 (the-as float #x0)) + + (label L43) + ;;(b! #t L49 (nop!)) + (goto L49) + + + (label L44) + (set! f1 (the-as float #x33000000)) + ;;(b! (>=.s f0 f1) L45 (nop!)) + (when-goto (>= f0 f1) L45) + + (set! f0 (the-as float #x3f800000)) + (set! f1 arg) + (set! f0 (+ f0 f1)) + (set! v0 f0) + ;;(b! #t L49 (nop!)) + (goto L49) + + (label L45) + (set! f16 (the-as float #x4238aa3b)) + (set! f12 (the-as float #x3cb17200)) + (set! f13 (the-as float #x333fbe8e)) + (set! f14 (the-as float #x3f000044)) + (set! f15 (the-as float #x3e2aaaec)) + (set! f0 arg) + (set! f0 (* f0 f16)) + ;;(set! f0 (f2i f0)) + ;;(set! a2 (fpr->gpr f0)) + (set! a2 (the int f0)) + (set! v1 (logand a2 31)) + (set! a1 (- a2 v1)) + (set! a3 512) + (set! t0 a2) + ;;(bl! (<0.si t0) L46 (no-delay!)) + ;;(set! t0 (- t0)) + (set! t0 (abs t0)) + + ;;(label L46) + ;;(b! (>=.si a3 t0) L47 (nop!)) + (when-goto (>= a3 t0) L47) + ;;(set! f17 a1) + ;;(set! f18 v1) + ;;(set! f17 (i2f f17)) + ;;(set! f18 (i2f f18)) + (set! f17 (the float a1)) + (set! f18 (the float v1)) + (set! f17 (* f17 f12)) + (set! f18 (* f18 f12)) + (set! f0 arg) + (set! f17 (- f0 f17)) + ;;(b! #t L48 (set! f2 (-.s f17 f18))) + (set! f2 (- f17 f18)) + (goto L48) + + (label L47) + ;;(set! f17 (gpr->fpr a2)) + ;;(set! f17 (i2f f17)) + (set! f17 (the float a2)) + (set! f17 (* f17 f12)) + (set! f0 arg) + (set! f2 (- f0 f17)) + + (label L48) + (set! a0-2 (- a2)) + ;;(set! f17 (gpr->fpr a0)) + ;;(set! f17 (i2f f17)) + (set! f17 (the float a0-2)) + (set! f3 (* f17 f13)) + (set! a0-2 (sar a1 5)) + (set! f4 (+ f2 f3)) + (set! f6 (* f4 f15)) + (set! f6 (+ f14 f6)) + (set! f6 (* f4 f6)) + (set! f6 (* f4 f6)) + (set! f5 (+ f3 f6)) + (set! f5 (+ f2 f5)) + ;;(set! a1 exp-slead) + ;;(set! a2 (sll v1 2)) + ;;(set! a1 (+ a1 a2)) + ;;(set! f10 (l.f a1)) + (set! f10 (-> exp-slead v1)) + ;;(set! a1 exp-strail) + ;;(set! v1 (sll v1 2)) + ;;(set! v1 (+ a1 v1)) + ;;(set! f11 (l.f v1)) + (set! f11 (-> exp-strail v1)) + (set! f7 (+ f10 f11)) + (set! f8 (* f7 f5)) + (set! f8 (+ f11 f8)) + (set! f8 (+ f8 f10)) + (set! v1 (the-as int f8)) + (set! a0-2 (logand a0-2 511)) + (set! a0-2 (shl a0-2 23)) + (set! v0 (the-as float (+ v1 a0-2))) + (label L49) + v0 + ) + +(defun atan ((arg0 float) (arg1 float)) + "atan2, for rotation units" + (if + (and (= arg1 0.0) (= arg0 0.0)) + 0.000000 + (cond + ((and (< arg1 0.0) (< arg0 0.0)) + (+ -32768.0 (atan0 (- arg0) (- arg1))) + ) + ((< arg0 0.) + (- (atan0 (- arg0) arg1)) + ) + ((< arg1 0.) + (- 32768.0 (atan0 arg0 (- arg1))) + ) + (else + (atan0 arg0 arg1) + ) + ) + ) + ) + +(defun asin ((arg0 float)) + "Inverse sine. Returns rotation units" + (local-vars + (v1-1 symbol) + (v1-2 float) + (v1-4 float) + (gp-0 symbol) + (f0-0 float) + (f0-5 float) + (f0-6 float) + (f0-8 float) + (f1-2 float) + ) + (set! gp-0 '#f) + (set! f0-0 0.000000) + (when (< arg0 0.000000) (set! arg0 (- arg0)) (set! gp-0 '#t) (set! v1-1 gp-0)) + (cond + ((< 1.000000 arg0) (set! f0-5 (the-as float #x467ffffc)) (set! v1-2 f0-5)) + (else + (set! f0-6 1.000000) + (set! f1-2 arg0) + (set! f0-8 (sqrtf (- f0-6 (* f1-2 f1-2)))) + (set! f0-5 (atan0 arg0 f0-8)) + (set! v1-4 f0-5) + ) + ) + (if gp-0 (- f0-5) f0-5) + ) + +(defun acos ((arg0 float)) + "Inverse cosine. Returns rotation units" + (- 16384.000000 (asin arg0)) + ) + +(defun acos-rad ((arg0 float)) + "Inverse cosine, returning radians." + (local-vars + (a1-0 none) + (f0-1 float) + (f0-3 float) + (f0-5 float) + (f0-6 float) + (f0-8 float) + (f0-10 float) + (f1-1 float) + (f1-6 float) + ) + (cond + ((>= arg0 0.000000) + (set! f0-1 1.000000) + (set! f1-1 arg0) + (set! f0-3 (sqrtf (- f0-1 (* f1-1 f1-1)))) + (set! f0-5 (/ (- f0-3 arg0) (+ f0-3 arg0))) + (atan-series-rad f0-5) + ) + (else + (set! f0-6 1.000000) + (set! f1-6 arg0) + (set! f0-8 (sqrtf (- f0-6 (* f1-6 f1-6)))) + (set! f0-10 (/ (+ f0-8 arg0) (- f0-8 arg0))) + (- (the-as float #x40490fda) (atan-series-rad f0-10)) + ) + ) + ) + +(defun sinerp ((minimum float) (maximum float) (amount float)) + "map amount to min,max using sine. Kinda weird, usually people use cosine." + (lerp minimum maximum (sin (* 16384.000000 amount))) + ) + +(defun sinerp-clamp ((minimum float) (maximum float) (amount float)) + "Like sinerp, but clamp to min,max" + (cond + ((>= 0.000000 amount) minimum) + ((>= amount 1.000000) maximum) + (else (sinerp minimum maximum amount)) + ) + ) + +(defun coserp ((minimum float) (maximum float) (amount float)) + "Weird lerp with cosine (over 90 degrees?)" + (lerp minimum maximum (- 1.000000 (cos (* 16384.000000 amount)))) + ) + +(defun coserp-clamp ((minimum float) (maximum float) (amount float)) + "Weird 90 degree lerp with cosine, clamped to min,max" + (cond + ((>= 0.000000 amount) minimum) + ((>= amount 1.000000) maximum) + (else (coserp minimum maximum amount)) + ) + ) + +(defun coserp180 ((minimum float) (maximum float) (amount float)) + "Classic lerp with cosine" + (lerp minimum maximum + (* 0.5 (- 1.000000 (cos (* 32768.000000 amount)))) + ) + ) + +(defun coserp180-clamp ((minimum float) (maximum float) (amount float)) + "Classic coserp with saturation" + (cond + ((>= 0.000000 amount) minimum) + ((>= amount 1.000000) maximum) + (else (coserp180 minimum maximum amount)) + ) + ) + +(defun ease-in-out ((total int) (progress int)) + "Weird coserp like mapping from 0 to 1 as progress goes from 0 to total" + (local-vars (v1-0 int) (a0-1 int)) + (cond + ((>= progress total) + ;; past the end + 1.000000 + ) + ((<= progress 0) + ;; negative progress + 0.000000 + ) + ((begin (set! v1-0 (sar total 1)) (< v1-0 progress)) + ;; more than half way there! + (set! a0-1 (- progress total)) + (+ 0.500000 + (* 0.500000 (sin + (- 16384.000000 (/ (* 16384.000000 (the float a0-1)) (the float v1-0))) + ) + ) + ) + ) + (else + ;; less than half way there. + (- 0.500000 + (* 0.500000 + (cos (/ (* 16384.000000 (the float progress)) (the float v1-0))) + ) + ) + ) + ) + ) diff --git a/goal_src/engine/math/vector-h.gc b/goal_src/engine/math/vector-h.gc index 7ba8a9e714..5da43425bf 100644 --- a/goal_src/engine/math/vector-h.gc +++ b/goal_src/engine/math/vector-h.gc @@ -423,9 +423,9 @@ :size-assert #x28 :flag-assert #xb00000028 (:methods - (dummy-9 () none 9) - (dummy-10 () none 10) - ) + (dummy-9 () none 9) + (dummy-10 () none 10) + ) ) (deftype cylinder-flat (structure) @@ -438,9 +438,9 @@ :size-assert #x28 :flag-assert #xb00000028 (:methods - (dummy-9 () none 9) - (dummy-10 () none 10) - ) + (dummy-9 () none 9) + (dummy-10 () none 10) + ) ) ;; vector-h @@ -489,52 +489,122 @@ (defmacro set-vector! (v xv yv zv wv) `(begin - (set! (-> ,v x) ,xv) - (set! (-> ,v y) ,yv) - (set! (-> ,v z) ,zv) - (set! (-> ,v w) ,wv)) + (set! (-> ,v x) ,xv) + (set! (-> ,v y) ,yv) + (set! (-> ,v z) ,zv) + (set! (-> ,v w) ,wv)) ) (defun vector-dot ((a vector) (b vector)) "Take the dot product of two vectors. Only does the x, y, z compoments. Originally handwritten assembly to space out loads and use FPU accumulator" - (declare (inline)) - (let ((result 0.)) - (+! result (* (-> a x) (-> b x))) - (+! result (* (-> a y) (-> b y))) - (+! result (* (-> a z) (-> b z))) - result - ) - ) + (declare (inline)) + (let ((result 0.)) + (+! result (* (-> a x) (-> b x))) + (+! result (* (-> a y) (-> b y))) + (+! result (* (-> a z) (-> b z))) + result + ) + ) (defun vector-dot-vu ((a vector) (b vector)) "Take the dot product of two vectors. Only does the x, y, z components. Originally implemented using VU macro ops" - (declare (inline)) - (vector-dot a b) + (declare (inline)) + (rlet ((vf1 :class vf) + (vf2 :class vf) + (result :class fpr :type float)) + ;; (.lqc2 vf1 0 arg0) + (.lvf vf1 a) + ;; (.lqc2 vf2 0 arg1) + (.lvf vf2 b) + ;; (.vmul.xyzw vf1 vf1 vf2) + (.mul.vf vf1 vf1 vf2) + ;; (.vaddy.x vf1 vf1 vf1) + (.add.y.vf vf1 vf1 vf1 :mask #b1) + ;; (.vaddz.x vf1 vf1 vf1) + (.add.z.vf vf1 vf1 vf1 :mask #b1) + ;; (.qmfc2.i v0-0 vf1) + (.mov result vf1) + result + ) ) (defun vector4-dot ((a vector) (b vector)) "Take the dot product of two vectors. Does the x, y, z, and w compoments" - (declare (inline)) - (let ((result 0.)) - (+! result (* (-> a x) (-> b x))) - (+! result (* (-> a y) (-> b y))) - (+! result (* (-> a z) (-> b z))) - (+! result (* (-> a w) (-> b w))) - result + (declare (inline)) + (let ((result 0.)) + (+! result (* (-> a x) (-> b x))) + (+! result (* (-> a y) (-> b y))) + (+! result (* (-> a z) (-> b z))) + (+! result (* (-> a w) (-> b w))) + result + ) + ) + +(defmacro print-vf (vf &key (name #f)) + `(let ((temp (new 'stack 'vector))) + (.svf temp ,vf) + ,(if name + `(format #t "~A: ~`vector`P~%" (quote ,name) temp) + `(format #t "~`vector`P~%" temp) + ) ) - ) + ) + +(defmacro print-vf-hex (vf) + `(let ((temp (new 'stack 'vector4w))) + (.svf temp ,vf) + (format #t "~`vector4w`P~%" temp) + ) + ) (defun vector4-dot-vu ((a vector) (b vector)) "Take the dot product of two vectors. Does the x, y, z, and w compoments Originally implemented using VU macro ops" - (declare (inline)) - (vector4-dot a b) + (declare (inline)) + (rlet ((vf1 :class vf) + (vf2 :class vf) + (vf3 :class vf) + (acc :class vf) + (vf0 :class vf) + (result :class fpr :type float)) + (.lvf vf0 (new 'static 'vector :x 0.0 :y 0.0 :z 0.0 :w 1.0)) + ;; (.lqc2 vf1 0 arg0) + (.lvf vf1 a) + ;; (.lqc2 vf2 0 arg1) + (.lvf vf2 b) + + ;; (.vmul.xyzw vf1 vf1 vf2) + ;; set vf1 to element-wise products + (.mul.vf vf1 vf1 vf2) + + ;; (.vaddw.x vf3 vf0 vf0) + ;; set vf3x to 1 + (.xor.vf vf3 vf3 vf3) + (.add.w.vf vf3 vf0 vf0 :mask #b1) + + ;; (.vmulax.x acc vf3 vf1) + ;; acc.x is now (xa * xb) + (.mul.x.vf acc vf3 vf1 :mask #b1) + + ;; (.vmadday.x acc vf3 vf1) + ;; acc += thing + (.add.mul.y.vf acc vf3 vf1 acc :mask #b1) + + ;; (.vmaddaz.x acc vf3 vf1) + (.add.mul.z.vf acc vf3 vf1 acc :mask #b1) + + ;; (.vmaddw.x vf1 vf3 vf1) + (.add.mul.w.vf vf1 vf3 vf1 acc :mask #b1) + ;; (.qmfc2.i v0-0 vf1) + (.mov result vf1) + result + ) ) (defun vector+! ((dst vector) (a vector) (b vector)) @@ -544,18 +614,18 @@ (vf1 :class vf :reset-here #t) (vf2 :class vf :reset-here #t) (vf3 :class vf :reset-here #t)) - ; load vectors - (.lvf vf2 a) - (.lvf vf3 b) - ; set vf0 to zero - (.xor.vf vf0 vf0 vf0) - ; add - (.add.vf vf1 vf2 vf3) - ; set w = 0 - (.blend.vf vf1 vf1 vf0 :mask #b1000) - ; store - (.svf dst vf1) - ) + ;; load vectors + (.lvf vf2 a) + (.lvf vf3 b) + ;; set vf0 to zero + (.xor.vf vf0 vf0 vf0) + ;; add + (.add.vf vf1 vf2 vf3) + ;; set w = 0 + (.blend.vf vf1 vf1 vf0 :mask #b1000) + ;; store + (.svf dst vf1) + ) dst ) @@ -566,18 +636,18 @@ (vf1 :class vf :reset-here #t) (vf2 :class vf :reset-here #t) (vf3 :class vf :reset-here #t)) - ; load vectors - (.lvf vf2 a) - (.lvf vf3 b) - ; set vf0 to zero - (.xor.vf vf0 vf0 vf0) - ; subtract - (.sub.vf vf1 vf2 vf3) - ; set w = 0 - (.blend.vf vf1 vf1 vf0 :mask #b1000) - ; store - (.svf dst vf1) - ) + ;; load vectors + (.lvf vf2 a) + (.lvf vf3 b) + ;; set vf0 to zero + (.xor.vf vf0 vf0 vf0) + ;; subtract + (.sub.vf vf1 vf2 vf3) + ;; set w = 0 + (.blend.vf vf1 vf1 vf0 :mask #b1000) + ;; store + (.svf dst vf1) + ) dst ) @@ -585,11 +655,11 @@ "Set xyzw to 0." (declare (inline)) (rlet ((vf1 :class vf :reset-here #t)) - ; set vf1 = 0 - (.xor.vf vf1 vf1 vf1) - ; store the 0 - (.svf dest vf1) - ) + ;; set vf1 = 0 + (.xor.vf vf1 vf1 vf1) + ;; store the 0 + (.svf dest vf1) + ) dest ) @@ -606,9 +676,9 @@ The vectors must be aligned." (declare (inline)) (rlet ((vf1 :class vf :reset-here #t)) - (.lvf vf1 src) - (.svf dst vf1) - ) + (.lvf vf1 src) + (.svf dst vf1) + ) dst ) diff --git a/goal_src/goal-lib.gc b/goal_src/goal-lib.gc index 80a7622785..5b5897e232 100644 --- a/goal_src/goal-lib.gc +++ b/goal_src/goal-lib.gc @@ -191,6 +191,10 @@ ) ) +(defmacro defun-extern (function-name &rest type-info) + `(define-extern ,function-name (function ,@type-info)) + ) + ;; Define a new function, but only if we're debugging. ;; TODO - should place the function in the debug segment! (defmacro defun-debug (name bindings &rest body) diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index d225757781..18edc3dcfc 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -72,6 +72,16 @@ class Compiler { emitter::Register::VF_ELEMENT broadcastElement, Env* env); + Val* compile_asm_vf_math2(const goos::Object& form, + const goos::Object& rest, + IR_VFMath2Asm::Kind kind, + Env* env); + + Val* compile_asm_vf_math2_imm_u8(const goos::Object& form, + const goos::Object& rest, + IR_VFMath2Asm::Kind kind, + Env* env); + Val* compile_asm_vf_math4_two_operation(const goos::Object& form, const goos::Object& rest, IR_VFMath3Asm::Kind first_op_kind, @@ -150,6 +160,11 @@ class Compiler { Env* env, bool call_constructor); + StaticResult fill_static_array(const goos::Object& form, + const goos::Object& rest, + bool boxed, + Env* env); + TypeSystem m_ts; std::unique_ptr m_global_env = nullptr; std::unique_ptr m_none = nullptr; @@ -360,6 +375,11 @@ class Compiler { Val* compile_asm_div_vf(const goos::Object& form, const goos::Object& rest, Env* env); Val* compile_asm_sqrt_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_itof_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_ftoi_vf(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env); + Val* compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env); // Atoms diff --git a/goalc/compiler/IR.cpp b/goalc/compiler/IR.cpp index bcb2b66989..0717b03420 100644 --- a/goalc/compiler/IR.cpp +++ b/goalc/compiler/IR.cpp @@ -110,6 +110,14 @@ void regset_common(emitter::ObjectGenerator* gen, } else if (src_class == RegClass::GPR_64 && dst_class == RegClass::FLOAT) { // gpr -> xmm 1x gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec); + } else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::FLOAT) { + gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec); + } else if (src_class == RegClass::FLOAT && dst_class == RegClass::VECTOR_FLOAT) { + gen->add_instr(IGen::mov_xmm32_xmm32(dst_reg, src_reg), irec); + } else if (src_class == RegClass::GPR_64 && dst_class == RegClass::VECTOR_FLOAT) { + gen->add_instr(IGen::movd_xmm32_gpr32(dst_reg, src_reg), irec); + } else if (src_class == RegClass::VECTOR_FLOAT && dst_class == RegClass::GPR_64) { + gen->add_instr(IGen::movd_gpr32_xmm32(dst_reg, src_reg), irec); } else { assert(false); // unhandled move. } @@ -1426,6 +1434,99 @@ void IR_VFMath3Asm::do_codegen(emitter::ObjectGenerator* gen, } } +/////////////////////// +// AsmVF2 +/////////////////////// + +IR_VFMath2Asm::IR_VFMath2Asm(bool use_color, + const RegVal* dst, + const RegVal* src, + Kind kind, + std::optional imm) + : IR_Asm(use_color), m_dst(dst), m_src(src), m_kind(kind), m_imm(std::move(imm)) {} + +std::string IR_VFMath2Asm::print() { + std::string function; + bool use_imm = false; + switch (m_kind) { + case Kind::ITOF: + function = ".itof.vf"; + break; + case Kind::FTOI: + function = ".ftoi.vf"; + break; + case Kind::PW_SLL: + use_imm = true; + function = ".pw.sll"; + break; + case Kind::PW_SRL: + use_imm = true; + function = ".pw.srl"; + break; + case Kind::PW_SRA: + use_imm = true; + function = ".pw.sra"; + break; + default: + assert(false); + } + + if (use_imm) { + assert(m_imm.has_value()); + return fmt::format("{}{} {}, {}, {}", function, get_color_suffix_string(), m_dst->print(), + m_src->print(), *m_imm); + } else { + return fmt::format("{}{} {}, {}", function, get_color_suffix_string(), m_dst->print(), + m_src->print()); + } +} + +RegAllocInstr IR_VFMath2Asm::to_rai() { + RegAllocInstr rai; + if (m_use_coloring) { + rai.write.push_back(m_dst->ireg()); + rai.read.push_back(m_src->ireg()); + } + return rai; +} + +void IR_VFMath2Asm::do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) { + auto dst = get_reg_asm(m_dst, allocs, irec, m_use_coloring); + auto src = get_reg_asm(m_src, allocs, irec, m_use_coloring); + + switch (m_kind) { + case Kind::ITOF: + gen->add_instr(IGen::itof_vf(dst, src), irec); + break; + case Kind::FTOI: + gen->add_instr(IGen::ftoi_vf(dst, src), irec); + break; + case Kind::PW_SLL: + // you are technically allowed to put values > 32 in here. + assert(m_imm.has_value()); + assert(*m_imm >= 0); + assert(*m_imm <= 255); + gen->add_instr(IGen::pw_sll(dst, src, *m_imm), irec); + break; + case Kind::PW_SRL: + assert(m_imm.has_value()); + assert(*m_imm >= 0); + assert(*m_imm <= 255); + gen->add_instr(IGen::pw_srl(dst, src, *m_imm), irec); + break; + case Kind::PW_SRA: + assert(m_imm.has_value()); + assert(*m_imm >= 0); + assert(*m_imm <= 255); + gen->add_instr(IGen::pw_sra(dst, src, *m_imm), irec); + break; + default: + assert(false); + } +} + // ---- Blend VF IR_BlendVF::IR_BlendVF(bool use_color, diff --git a/goalc/compiler/IR.h b/goalc/compiler/IR.h index eac6b02b80..ba35e10083 100644 --- a/goalc/compiler/IR.h +++ b/goalc/compiler/IR.h @@ -546,6 +546,27 @@ class IR_VFMath3Asm : public IR_Asm { Kind m_kind; }; +class IR_VFMath2Asm : public IR_Asm { + public: + enum class Kind { ITOF, FTOI, PW_SLL, PW_SRL, PW_SRA }; + IR_VFMath2Asm(bool use_color, + const RegVal* dst, + const RegVal* src, + Kind kind, + std::optional = std::nullopt); + std::string print() override; + RegAllocInstr to_rai() override; + void do_codegen(emitter::ObjectGenerator* gen, + const AllocationResult& allocs, + emitter::IR_Record irec) override; + + protected: + const RegVal* m_dst = nullptr; + const RegVal* m_src = nullptr; + Kind m_kind; + std::optional m_imm; +}; + class IR_BlendVF : public IR_Asm { public: IR_BlendVF(bool use_color, const RegVal* dst, const RegVal* src1, const RegVal* src2, u8 mask); diff --git a/goalc/compiler/Util.cpp b/goalc/compiler/Util.cpp index 5c4ab22813..a2a6a3f0e7 100644 --- a/goalc/compiler/Util.cpp +++ b/goalc/compiler/Util.cpp @@ -267,6 +267,8 @@ std::vector Compiler::get_list_as_vector(const goos::Object& o, if (max_length >= 0 && n >= max_length) { if (rest_out) { *rest_out = *cur; + } else { + throw std::runtime_error("get_list_as_vector would discard arguments"); } return result; } diff --git a/goalc/compiler/compilation/Asm.cpp b/goalc/compiler/compilation/Asm.cpp index e219b6806a..05a57f5831 100644 --- a/goalc/compiler/compilation/Asm.cpp +++ b/goalc/compiler/compilation/Asm.cpp @@ -282,7 +282,6 @@ Val* Compiler::compile_asm_lvf(const goos::Object& form, const goos::Object& res info.reg = RegClass::VECTOR_FLOAT; if (as_co) { // can do a clever offset here - assert(false); env->emit_ir(dest, as_co->offset, as_co->base->to_gpr(env), info, color); } else if (as_sv) { if (!color) { @@ -431,6 +430,116 @@ Val* Compiler::compile_asm_vf_math3(const goos::Object& form, return get_none(); } +Val* Compiler::compile_asm_vf_math2(const goos::Object& form, + const goos::Object& rest, + IR_VFMath2Asm::Kind kind, + Env* env) { + auto args = get_va(form, rest); + va_check( + form, args, {{}, {}}, + {{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + check_vector_float_regs(form, env, {{"destination", dest}, {"source", src}}); + + u8 mask = 0b1111; + if (args.has_named("mask")) { + mask = args.named.at("mask").as_int(); + if (mask > 15) { + throw_compiler_error(form, "The value {} is out of range for a blend mask (0-15 inclusive).", + mask); + } + } + + // If the entire destination is to be copied, we can optimize out the blend + if (mask == 0b1111) { + env->emit_ir(color, dest, src, kind); + } else { + auto temp_reg = env->make_vfr(dest->type()); + // Perform the arithmetic operation on the two vectors into a temporary register + env->emit_ir(color, temp_reg, src, kind); + // Blend the result back into the destination register using the mask + env->emit_ir(color, dest, dest, temp_reg, mask); + } + + return get_none(); +} + +Val* Compiler::compile_asm_vf_math2_imm_u8(const goos::Object& form, + const goos::Object& rest, + IR_VFMath2Asm::Kind kind, + Env* env) { + auto args = get_va(form, rest); + va_check( + form, args, {{}, {}, {}}, + {{"color", {false, goos::ObjectType::SYMBOL}}, {"mask", {false, goos::ObjectType::INTEGER}}}); + bool color = true; + if (args.has_named("color")) { + color = get_true_or_false(form, args.named.at("color")); + } + + auto dest = compile_error_guard(args.unnamed.at(0), env)->to_reg(env); + auto src = compile_error_guard(args.unnamed.at(1), env)->to_reg(env); + check_vector_float_regs(form, env, {{"destination", dest}, {"source", src}}); + s64 imm; + if (!try_getting_constant_integer(args.unnamed.at(2), &imm, env)) { + throw_compiler_error(form, "Could not evaluate {} as a compile-time integer.", + args.unnamed.at(2).print()); + } + + if (imm < 0 || imm > 255) { + throw_compiler_error(form, "Immediate {} is invalid. The value {} is out of range for a uint8.", + args.unnamed.at(2).print(), imm); + } + + u8 mask = 0b1111; + if (args.has_named("mask")) { + mask = args.named.at("mask").as_int(); + if (mask > 15) { + throw_compiler_error(form, "The value {} is out of range for a blend mask (0-15 inclusive).", + mask); + } + } + + // If the entire destination is to be copied, we can optimize out the blend + if (mask == 0b1111) { + env->emit_ir(color, dest, src, kind, imm); + } else { + auto temp_reg = env->make_vfr(dest->type()); + // Perform the arithmetic operation on the two vectors into a temporary register + env->emit_ir(color, temp_reg, src, kind, imm); + // Blend the result back into the destination register using the mask + env->emit_ir(color, dest, dest, temp_reg, mask); + } + + return get_none(); +} + +Val* Compiler::compile_asm_pw_sll(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SLL, env); +} + +Val* Compiler::compile_asm_pw_srl(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SRL, env); +} + +Val* Compiler::compile_asm_pw_sra(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math2_imm_u8(form, rest, IR_VFMath2Asm::Kind::PW_SRA, env); +} + +Val* Compiler::compile_asm_itof_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math2(form, rest, IR_VFMath2Asm::Kind::ITOF, env); +} + +Val* Compiler::compile_asm_ftoi_vf(const goos::Object& form, const goos::Object& rest, Env* env) { + return compile_asm_vf_math2(form, rest, IR_VFMath2Asm::Kind::FTOI, env); +} + Val* Compiler::compile_asm_xor_vf(const goos::Object& form, const goos::Object& rest, Env* env) { return compile_asm_vf_math3(form, rest, IR_VFMath3Asm::Kind::XOR, emitter::Register::VF_ELEMENT::NONE, env); diff --git a/goalc/compiler/compilation/Atoms.cpp b/goalc/compiler/compilation/Atoms.cpp index fcfd5daad4..0846abb1c4 100644 --- a/goalc/compiler/compilation/Atoms.cpp +++ b/goalc/compiler/compilation/Atoms.cpp @@ -87,6 +87,12 @@ static const std::unordered_map< {".div.vf", &Compiler::compile_asm_div_vf}, {".sqrt.vf", &Compiler::compile_asm_sqrt_vf}, + {".itof.vf", &Compiler::compile_asm_itof_vf}, + {".ftoi.vf", &Compiler::compile_asm_ftoi_vf}, + + {".pw.sll", &Compiler::compile_asm_pw_sll}, + {".pw.srl", &Compiler::compile_asm_pw_srl}, + {".pw.sra", &Compiler::compile_asm_pw_sra}, // BLOCK FORMS {"top-level", &Compiler::compile_top_level}, diff --git a/goalc/compiler/compilation/CompilerControl.cpp b/goalc/compiler/compilation/CompilerControl.cpp index d136ef9925..507a4acc16 100644 --- a/goalc/compiler/compilation/CompilerControl.cpp +++ b/goalc/compiler/compilation/CompilerControl.cpp @@ -182,6 +182,11 @@ Val* Compiler::compile_asm_file(const goos::Object& form, const goos::Object& re printf(" %12s %4.0f", e.first.c_str(), e.second); } printf("\n"); + } else { + auto total_time = total_timer.getMs(); + if (total_time > 10.0) { + fmt::print("[ASM-FILE] {} took {:.2f} ms\n", obj_file_name, total_time); + } } return get_none(); diff --git a/goalc/compiler/compilation/Static.cpp b/goalc/compiler/compilation/Static.cpp index 06b5dfa1da..c2d55304dc 100644 --- a/goalc/compiler/compilation/Static.cpp +++ b/goalc/compiler/compilation/Static.cpp @@ -434,69 +434,9 @@ StaticResult Compiler::compile_static(const goos::Object& form, Env* env) { } if (unquote(args.at(1)).as_symbol()->name == "boxed-array") { - // (new 'static 'boxed-array ...) - // get all arguments now - args = get_list_as_vector(rest, &constructor_args); - if (args.size() < 4) { - throw_compiler_error(form, - "new static boxed array must have type and min-size arguments"); - } - auto content_type = parse_typespec(args.at(2)); - s64 min_size; - if (!try_getting_constant_integer(args.at(3), &min_size, env)) { - throw_compiler_error(form, "The length {} is not valid.", args.at(3).print()); - } - s32 length = std::max(min_size, s64(args.size() - 4)); - // todo - generalize this array stuff if we ever need other types of static arrays. - auto pointer_type = m_ts.make_pointer_typespec(content_type); - auto deref_info = m_ts.get_deref_info(pointer_type); - assert(deref_info.can_deref); - assert(deref_info.mem_deref); - auto array_size_bytes = length * deref_info.stride; - // todo, segments - auto obj = std::make_unique(MAIN_SEGMENT, "array"); - obj->data.resize(16 + array_size_bytes); - // 0 - 4 : type tag (set automatically) - // 4 - 8 : length - memcpy(obj->data.data() + 4, &length, 4); - // 8 - 12 allocated length - memcpy(obj->data.data() + 8, &length, 4); - // 12 - 16 content type - obj->add_type_record(content_type.base_type(), 12); - - // now add arguments: - for (size_t i = 4; i < args.size(); i++) { - int arg_idx = i - 4; - int elt_offset = 16 + arg_idx * deref_info.stride; - auto sr = compile_static(args.at(i), env); - if (is_integer(content_type)) { - typecheck(form, TypeSpec("integer"), sr.typespec()); - } else { - typecheck(form, content_type, sr.typespec()); - } - if (sr.is_symbol()) { - assert(deref_info.stride == 4); - obj->add_symbol_record(sr.symbol_name(), elt_offset); - u32 symbol_placeholder = 0xffffffff; - memcpy(obj->data.data() + elt_offset, &symbol_placeholder, 4); - } else if (sr.is_reference()) { - assert(deref_info.stride == 4); - obj->add_pointer_record(elt_offset, sr.reference(), sr.reference()->get_addr_offset()); - } else if (sr.is_constant_data()) { - if (!integer_fits(sr.constant_data(), deref_info.load_size, deref_info.sign_extend)) { - throw_compiler_error(form, "The integer {} doesn't fit in element {} of array of {}", - sr.constant_data(), arg_idx, content_type.print()); - } - u64 data = sr.constant_data(); - memcpy(obj->data.data() + elt_offset, &data, deref_info.load_size); - } else { - assert(false); - } - } - auto result = StaticResult::make_structure_reference( - obj.get(), m_ts.make_array_typespec(content_type)); - fie->add_static(std::move(obj)); - return result; + return fill_static_array(form, rest, true, env); + } else if (unquote(args.at(1)).as_symbol()->name == "array") { + return fill_static_array(form, rest, false, env); } else { auto ts = parse_typespec(unquote(args.at(1))); if (ts == TypeSpec("string")) { @@ -520,6 +460,18 @@ StaticResult Compiler::compile_static(const goos::Object& form, Env* env) { throw_compiler_error(form, "Cannot construct a static {}.", ts.print()); } } + } else if (first.is_symbol() && first.as_symbol()->name == "the-as") { + auto args = get_va(form, rest); + va_check(form, args, {{}, {}}, {}); + auto type = parse_typespec(args.unnamed.at(0)); + if (type == TypeSpec("float")) { + s64 value; + if (try_getting_constant_integer(args.unnamed.at(1), &value, env)) { + if (integer_fits(value, 4, false)) { + return StaticResult::make_constant_data(value, TypeSpec("float")); + } + } + } } else { // maybe an enum s64 int_out; @@ -533,6 +485,90 @@ StaticResult Compiler::compile_static(const goos::Object& form, Env* env) { return {}; } +StaticResult Compiler::fill_static_array(const goos::Object& form, + const goos::Object& rest, + bool boxed, + Env* env) { + auto fie = get_parent_env_of_type(env); + // (new 'static 'boxed-array ...) + // get all arguments now + auto args = get_list_as_vector(rest); + if (args.size() < 4) { + throw_compiler_error(form, "new static boxed array must have type and min-size arguments"); + } + auto content_type = parse_typespec(args.at(2)); + s64 min_size; + if (!try_getting_constant_integer(args.at(3), &min_size, env)) { + throw_compiler_error(form, "The length {} is not valid.", args.at(3).print()); + } + s32 length = std::max(min_size, s64(args.size() - 4)); + // todo - generalize this array stuff if we ever need other types of static arrays. + auto pointer_type = m_ts.make_pointer_typespec(content_type); + auto deref_info = m_ts.get_deref_info(pointer_type); + assert(deref_info.can_deref); + assert(deref_info.mem_deref); + auto array_data_size_bytes = length * deref_info.stride; + // todo, segments + std::unique_ptr obj; + if (boxed) { + obj = std::make_unique(MAIN_SEGMENT, "array"); + } else { + obj = std::make_unique(MAIN_SEGMENT); + } + + int array_header_size = boxed ? 16 : 0; + obj->data.resize(array_header_size + array_data_size_bytes); + + if (boxed) { + // 0 - 4 : type tag (set automatically) + // 4 - 8 : length + memcpy(obj->data.data() + 4, &length, 4); + // 8 - 12 allocated length + memcpy(obj->data.data() + 8, &length, 4); + // 12 - 16 content type + obj->add_type_record(content_type.base_type(), 12); + } + + // now add arguments: + for (size_t i = 4; i < args.size(); i++) { + int arg_idx = i - 4; + int elt_offset = array_header_size + arg_idx * deref_info.stride; + auto sr = compile_static(args.at(i), env); + if (is_integer(content_type)) { + typecheck(form, TypeSpec("integer"), sr.typespec()); + } else { + typecheck(form, content_type, sr.typespec()); + } + if (sr.is_symbol()) { + assert(deref_info.stride == 4); + obj->add_symbol_record(sr.symbol_name(), elt_offset); + u32 symbol_placeholder = 0xffffffff; + memcpy(obj->data.data() + elt_offset, &symbol_placeholder, 4); + } else if (sr.is_reference()) { + assert(deref_info.stride == 4); + obj->add_pointer_record(elt_offset, sr.reference(), sr.reference()->get_addr_offset()); + } else if (sr.is_constant_data()) { + if (!integer_fits(sr.constant_data(), deref_info.load_size, deref_info.sign_extend)) { + throw_compiler_error(form, "The integer {} doesn't fit in element {} of array of {}", + sr.constant_data(), arg_idx, content_type.print()); + } + u64 data = sr.constant_data(); + memcpy(obj->data.data() + elt_offset, &data, deref_info.load_size); + } else { + assert(false); + } + } + TypeSpec result_type; + if (boxed) { + result_type = m_ts.make_array_typespec(content_type); + } else { + result_type = m_ts.make_pointer_typespec(content_type); + } + auto result = StaticResult::make_structure_reference(obj.get(), result_type); + fie->add_static(std::move(obj)); + return result; +} + Val* Compiler::compile_new_static_bitfield(const goos::Object& form, const TypeSpec& type, const goos::Object& _field_defs, diff --git a/goalc/compiler/compilation/Type.cpp b/goalc/compiler/compilation/Type.cpp index 1189534040..068129265a 100644 --- a/goalc/compiler/compilation/Type.cpp +++ b/goalc/compiler/compilation/Type.cpp @@ -761,7 +761,8 @@ Val* Compiler::compile_static_new(const goos::Object& form, const goos::Object* rest, Env* env) { auto unquoted = unquote(type); - if (unquoted.is_symbol() && unquoted.as_symbol()->name == "boxed-array") { + if (unquoted.is_symbol() && + (unquoted.as_symbol()->name == "boxed-array" || unquoted.as_symbol()->name == "array")) { auto fe = get_parent_env_of_type(env); auto sr = compile_static(form, env); auto result = fe->alloc_val(sr.reference(), sr.typespec()); @@ -884,7 +885,7 @@ Val* Compiler::compile_new(const goos::Object& form, const goos::Object& _rest, return compile_static_new(form, type, rest, env); } else if (allocation == "stack") { return compile_stack_new(form, type, rest, env, true); - } else if (allocation == "stack-no-constructor") { + } else if (allocation == "stack-no-clear") { return compile_stack_new(form, type, rest, env, false); } diff --git a/goalc/emitter/IGen.h b/goalc/emitter/IGen.h index a86c8b944a..5bc4eef08b 100644 --- a/goalc/emitter/IGen.h +++ b/goalc/emitter/IGen.h @@ -2339,6 +2339,56 @@ class IGen { instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0b0); return instr; } + + static Instruction itof_vf(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0x5b); // VCVTDQ2PS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0); + return instr; + } + + static Instruction ftoi_vf(Register dst, Register src) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + Instruction instr(0x5b); // VCVTDQ2PS + instr.set_vex_modrm_and_rex(dst.hw_id(), src.hw_id(), VEX3::LeadingBytes::P_0F, 0, false, + VexPrefix::P_66); + return instr; + } + + static Instruction pw_sra(Register dst, Register src, u8 imm) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /4 ib VPSRAD xmm1, xmm2, imm8 + Instruction instr(0x72); + instr.set_vex_modrm_and_rex(4, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; + } + + static Instruction pw_srl(Register dst, Register src, u8 imm) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /2 ib VPSRLD xmm1, xmm2, imm8 + Instruction instr(0x72); + instr.set_vex_modrm_and_rex(2, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; + } + + static Instruction pw_sll(Register dst, Register src, u8 imm) { + assert(dst.is_xmm()); + assert(src.is_xmm()); + // VEX.128.66.0F.WIG 72 /6 ib VPSLLD xmm1, xmm2, imm8 + Instruction instr(0x72); + instr.set_vex_modrm_and_rex(6, src.hw_id(), VEX3::LeadingBytes::P_0F, dst.hw_id(), false, + VexPrefix::P_66); + instr.set(Imm(1, imm)); + return instr; + } }; } // namespace emitter diff --git a/goalc/emitter/Register.h b/goalc/emitter/Register.h index ee39ce6f51..71150dab6c 100644 --- a/goalc/emitter/Register.h +++ b/goalc/emitter/Register.h @@ -52,14 +52,14 @@ enum X86_REG : s8 { XMM5, XMM6, XMM7, - XMM8, - XMM9, - XMM10, - XMM11, - XMM12, - XMM13, - XMM14, - XMM15, + XMM8, // saved + XMM9, // saved + XMM10, // saved + XMM11, // saved + XMM12, // saved + XMM13, // saved + XMM14, // saved + XMM15, // saved }; class Register { diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index 19de874c67..78a0d402ac 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -111,7 +111,7 @@ std::unique_ptr FormRegressionTest::make_function( test->func.ir2.atomic_ops_succeeded = true; test->func.ir2.env.set_end_var(test->func.ir2.atomic_ops->end_op().return_var()); - EXPECT_TRUE(test->func.run_type_analysis_ir2(function_type, *dts, test->file, hints)); + EXPECT_TRUE(test->func.run_type_analysis_ir2(function_type, *dts, test->file, hints, {})); test->func.ir2.env.set_reg_use(analyze_ir2_register_usage(test->func)); diff --git a/test/decompiler/test_math_decomp.cpp b/test/decompiler/test_math_decomp.cpp index 93573b1b65..67a07ac129 100644 --- a/test/decompiler/test_math_decomp.cpp +++ b/test/decompiler/test_math_decomp.cpp @@ -98,7 +98,7 @@ TEST_F(FormRegressionTest, ExprSeek) { " (set! f2-0 (- arg1 arg0))\n" " (cond\n" " ((>= arg2 (fabs f2-0)) arg1)\n" - " ((>= f2-0 0) (+ arg0 arg2))\n" + " ((>= f2-0 0.000000) (+ arg0 arg2))\n" " (else (- arg0 arg2))\n" " )\n" " )"; diff --git a/test/goalc/source_templates/with_game/test-pw-shifts.gc b/test/goalc/source_templates/with_game/test-pw-shifts.gc new file mode 100644 index 0000000000..93c659a04b --- /dev/null +++ b/test/goalc/source_templates/with_game/test-pw-shifts.gc @@ -0,0 +1,28 @@ +(defmacro print-vf-hex (vf) + `(let ((temp (new 'stack 'vector4w))) + (.svf temp ,vf) + (format #t "~X ~X ~X ~X~%" (-> temp data 0) (-> temp data 1) (-> temp data 2) (-> temp data 3)) + ) + ) + +(defun pw-shift-test () + (let ((temp (new 'stack 'vector4w))) + (rlet ((vf1 :class vf) + (vf2 :class vf) + ) + ;;(.lvf vf1 (new 'static 'vector4w :x #xfafffff0 :y #xfbfffff0 :z #xfcfffff0 :w #xfdfffff0)) + (set-vector! temp #xaafffff0 #xbbfffff0 #xccfffff0 #xddfffff0) + (.lvf vf1 temp) + (print-vf-hex vf1) ;; unchanged + (.pw.sra vf2 vf1 2) + (print-vf-hex vf2) ;; should sign extend + (.pw.sll vf1 vf1 4) + (print-vf-hex vf1) ;; original, removing top + (.pw.srl vf2 vf1 2) ;; should make it positive. + (print-vf-hex vf2) + ) + ) + ) + +(pw-shift-test) +0 \ No newline at end of file diff --git a/test/goalc/source_templates/with_game/test-static-array.gc b/test/goalc/source_templates/with_game/test-static-array.gc new file mode 100644 index 0000000000..169d1b5868 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-static-array.gc @@ -0,0 +1,4 @@ +(let ((test-array (new 'static 'array int16 10 1 2 -10))) + (format #t "~d ~d ~d~%" (-> test-array 0) (-> test-array 1) (-> test-array 2)) + 0 + ) \ No newline at end of file diff --git a/test/goalc/source_templates/with_game/test-trig.gc b/test/goalc/source_templates/with_game/test-trig.gc new file mode 100644 index 0000000000..95284214b8 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-trig.gc @@ -0,0 +1,164 @@ + +(defmacro pf (x) + `(format #t "~f~%" ,x) + ) + +(defmacro pfv (x) + `(format #t "~f ~f ~f ~f~%" (-> ,x x) (-> ,x y) (-> ,x z) (-> ,x w)) + ) + +(defmacro pr (x) + `(format #t "~r~%" ,x) + ) + +(defconstant PI_OVER_6 0.523598) +(defconstant PI_OVER_4 0.785398) +(defconstant PI_OVER_3 1.047198) +(defconstant PI_OVER_2 1.570796) + +;; degrees conversion and printing +(format #t "~r~%" (degrees 2.0)) ;; 2.000 +(format #t "~r~%" (degrees -45)) ;; -45.000 + +;; radmod +(pf (radmod 1.2)) ;; 1.200 +(pf (radmod -1.2)) ;; -1.200 +(pf (radmod 4.0)) ;; -2.2831 +(pf (radmod -5.0)) ;; 1.2831 +(format #t "~%") + +;; sin +(pf (sin (degrees 30))) ;; 0.5 +(pf (sin (degrees 90))) ;; 1 +(pf (sin (degrees (- 45)))) ;; -.707 +(pf (sin (degrees (- 60)))) ;; -.866 +(format #t "~%") + +;; sin-rad +(pf (sin-rad PI_OVER_6)) +(pf (sin-rad PI_OVER_2)) +(pf (sin-rad (- PI_OVER_4))) +(pf (sin-rad (- PI_OVER_3))) +(format #t "~%") + +;; vector-sin-rad! +(let ((in (new 'stack 'vector)) + (out (new 'stack 'vector)) + ) + (set-vector! in PI_OVER_6 (- PI_OVER_4) PI_OVER_3 (- PI_OVER_2)) + (vector-sin-rad! out in) + (pfv out) + ) +(format #t "~%") + +;; cos-rad (this one is correct) +(pf (cos-rad PI_OVER_6)) +(pf (cos-rad PI_OVER_2)) +(pf (cos-rad (- PI_OVER_4))) +(pf (cos-rad (- PI_OVER_3))) +(format #t "~%") + +;; vector-cos-rad! +(let ((in (new 'stack 'vector)) + (out (new 'stack 'vector)) + ) + (set-vector! in PI_OVER_6 (- PI_OVER_4) PI_OVER_3 (- PI_OVER_2)) + (vector-cos-rad! out in) + (pfv out) + ) +(format #t "~%") + +;; vector-sincos-rad! +(let ((in (new 'stack 'vector)) + (out-sin (new 'stack 'vector)) + (out-cos (new 'stack 'vector)) + ) + (set-vector! in PI_OVER_6 (- PI_OVER_4) PI_OVER_3 (- PI_OVER_2)) + (vector-sincos-rad! out-sin out-cos in) + (pfv out-sin) + (pfv out-cos) + ) +(format #t "~%") + +;; sincos-rad! (has cosine bug) +(let ((out (new 'stack 'array 'float 2))) + (sincos-rad! out PI_OVER_4) + (format #t "~f ~f~%" (-> out 0) (-> out 1)) + (sincos-rad! out (- PI_OVER_2)) + (format #t "~f ~f~%" (-> out 0) (-> out 1)) + ) +(format #t "~%") + +;; sincos! (has cosine bug) +(format #t "sincos!~%") +(let ((out (new 'stack 'array 'float 2))) + (sincos! out (+ 0.0 (degrees 765))) + (format #t "~f ~f~%" (-> out 0) (-> out 1)) + (sincos! out (+ (degrees 750) -0.0)) + (format #t "~f ~f~%" (-> out 0) (-> out 1)) + ) +(format #t "~%") + +;; vector-rad<-vector-deg! +(let ((out (new 'stack 'vector)) + (in (new 'stack 'vector)) + ) + (set-vector! in (degrees 1.0) (degrees 182.0) (degrees -183.0) (degrees 790.0)) + (vector-rad<-vector-deg! out in) + (pfv out) + ) +(format #t "~%") + +;; vector-rad<-vector-deg/2! +(let ((out (new 'stack 'vector)) + (in (new 'stack 'vector)) + ) + (set-vector! in (degrees (* 2.0 1.0)) (degrees (* 2.0 182.0)) (degrees (* 2.0 -183.0)) (degrees (* 2.0 790.0))) + (vector-rad<-vector-deg/2! out in) + (pfv out) + ) +(format #t "~%") + +;; tan +(pf (tan (degrees 0))) +(pf (tan (degrees 45.0))) +(pf (tan (degrees -30.0))) +(format #t "~%") + +;; atan-rad +(pf (atan-rad 0.5)) +(pf (atan-rad 0.707)) +(format #t "~%") + +;; atan2-rad +(pf (atan2-rad 1.0 2.0)) +(pf (atan2-rad -2.0 4.0)) +(pf (atan2-rad 1.0 -2.0)) +(pf (atan2-rad -1.0 -2.0)) +(format #t "~%") + +;; exp +(pf (exp 0.0)) +(pf (exp 0.2)) +(pf (exp 1.0)) +(pf (exp 1.3)) +(pf (exp 3.45)) +(pf (exp -1.0)) +(pf (exp -0.5)) +(pf (exp -2.34)) +(format #t "~%") + +;; atan2 +(pr (atan 2.0 4.0)) +(pr (atan -.707 1.0)) +(pr (atan 1.732 -2.0)) +(pr (atan -2.0 -1.0)) +(format #t "~%") + +;; asin +(pr (asin 0.707)) +(pr (asin -0.866)) +(format #t "~%") +0 + + diff --git a/test/goalc/source_templates/with_game/test-vector-int-float-conversions.gc b/test/goalc/source_templates/with_game/test-vector-int-float-conversions.gc new file mode 100644 index 0000000000..bfac379044 --- /dev/null +++ b/test/goalc/source_templates/with_game/test-vector-int-float-conversions.gc @@ -0,0 +1,29 @@ +(defmacro print-vf (vf) + `(let ((temp (new 'stack 'vector))) + (.svf temp ,vf) + (format #t "~f ~f ~f ~f~%" (-> temp x) (-> temp y) (-> temp z) (-> temp w)) + ) + ) + +(defmacro print-vf-hex (vf) + `(let ((temp (new 'stack 'vector4w))) + (.svf temp ,vf) + (format #t "~d ~d ~d ~d~%" (-> temp data 0) (-> temp data 1) (-> temp data 2) (-> temp data 3)) + ) + ) + + +(defun itof-test () + (rlet ((vf1 :class vf) + (vf2 :class vf)) + (.lvf vf1 (new 'static 'vector :x 1.0 :y -2.0 :z 3.0 :w 4.0)) + (.ftoi.vf vf2 vf1) + (print-vf vf1) + (print-vf-hex vf2) + (.itof.vf vf2 vf2) + (print-vf vf2) + ) + ) + +(itof-test) +0 \ No newline at end of file diff --git a/test/goalc/test_with_game.cpp b/test/goalc/test_with_game.cpp index 165f0d855c..5d0c368e6a 100644 --- a/test/goalc/test_with_game.cpp +++ b/test/goalc/test_with_game.cpp @@ -346,6 +346,80 @@ TEST_F(WithGameTests, StaticBoxedArray) { {"4 asdf \"test\" (a b) 0 object 12 12\n0\n"}); } +TEST_F(WithGameTests, Trig) { + runner.run_static_test(env, testCategory, "test-trig.gc", + {"2.0000\n" // 2 deg + "-45.0000\n" // -45 deg + "1.2000\n" + "-1.2000\n" + "-2.2831\n" // wrap + "1.2831\n" // wrapped + "\n" + "0.4999\n" // sin + "1.0000\n" + "-0.7071\n" + "-0.8659\n" + "\n" + "0.4999\n" // sin-rads + "1.0000\n" + "-0.7071\n" + "-0.8660\n" + "\n" + "0.4999 -0.7071 0.8660 -1.0000\n" // vector-sin-rad! + "\n" + "0.8660\n" // cos-rads + "0.0000\n" + "0.7071\n" + "0.4999\n" + "\n" + "0.8660 0.7071 0.4999 0.0000\n" // vector-cos-rad + "\n" + "0.4999 -0.7071 0.8660 -1.0000\n" // vector-sincos + "0.8660 0.7071 0.4999 0.0000\n" + "\n" + "0.7071 0.7082\n" // sincos with cosine bug + "-1.0000 0.0047\n" // sincos, with cosine bug + "\n" + "sincos!\n" + "0.7071 0.7082\n" // also with cosine bugs + "0.4999 0.8665\n" + "\n" + "0.0174 -3.1066 3.0892 1.2217\n" // vector-rad<-vector deg + "\n" + "0.0174 -3.1066 3.0892 1.2217\n" // with div/2 + "\n" + "0.0000\n" // tan + "1.0000\n" + "-0.5773\n" + "\n" + "0.4636\n" // atan-rad + "0.6154\n" + "\n" + "0.4636\n" // atan2 + "-0.4636\n" + "2.6779\n" + "-2.6779\n" + "\n" + "1.0000\n" // exp + "1.2214\n" + "2.7182\n" + "3.6692\n" + "31.5003\n" + "0.3678\n" + "0.6065\n" + "0.0963\n" + "\n" + "26.5650\n" + "-35.2603\n" + "139.1074\n" + "-116.5650\n" + "\n" + "44.9913\n" + "-59.9970\n" + "\n" + "0\n"}); +} + // VECTOR FLOAT TESTS // ---- One off Tests @@ -385,6 +459,29 @@ TEST_F(WithGameTests, ShortCircuit) { get_test_pass_string("short-circuit", 13)); } +TEST_F(WithGameTests, VectorFloatToInt) { + runner.run_static_test(env, testCategory, "test-vector-int-float-conversions.gc", + {"1.0000 -2.0000 3.0000 4.0000\n" + "1 -2 3 4\n" + "1.0000 -2.0000 3.0000 4.0000\n" + "0\n"}); +} + +TEST_F(WithGameTests, PWShifts) { + runner.run_static_test(env, testCategory, "test-pw-shifts.gc", + {"ffffffffaafffff0 ffffffffbbfffff0 ffffffffccfffff0 ffffffffddfffff0\n" + "ffffffffeabffffc ffffffffeefffffc fffffffff33ffffc fffffffff77ffffc\n" + "ffffffffafffff00 ffffffffbfffff00 ffffffffcfffff00 ffffffffdfffff00\n" + "2bffffc0 2fffffc0 33ffffc0 37ffffc0\n" + "0\n"}); +} + +TEST_F(WithGameTests, StaticArray) { + runner.run_static_test(env, testCategory, "test-static-array.gc", + {"1 2 -10\n" + "0\n"}); +} + TEST(TypeConsistency, TypeConsistency) { Compiler compiler; compiler.enable_throw_on_redefines(); diff --git a/test/test_emitter_avx.cpp b/test/test_emitter_avx.cpp index 96ca85595c..78b204166c 100644 --- a/test/test_emitter_avx.cpp +++ b/test/test_emitter_avx.cpp @@ -322,3 +322,53 @@ TEST(EmitterAVX, RIP) { tester.emit(IGen::loadvf_rip_plus_s32(XMM0 + 13, -123)); EXPECT_EQ(tester.dump_to_hex_string(true), "C5F8281D85FFFFFFC578282D85FFFFFF"); } + +TEST(EmitterAVX, ITOF) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::itof_vf(XMM0 + 3, XMM0 + 4)); + tester.emit(IGen::itof_vf(XMM0 + 3, XMM0 + 14)); + tester.emit(IGen::itof_vf(XMM0 + 13, XMM0 + 4)); + tester.emit(IGen::itof_vf(XMM0 + 13, XMM0 + 14)); + EXPECT_EQ(tester.dump_to_hex_string(true), "C5F85BDCC4C1785BDEC5785BECC441785BEE"); +} + +TEST(EmitterAVX, FTOI) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::ftoi_vf(XMM0 + 3, XMM0 + 4)); + tester.emit(IGen::ftoi_vf(XMM0 + 3, XMM0 + 14)); + tester.emit(IGen::ftoi_vf(XMM0 + 13, XMM0 + 4)); + tester.emit(IGen::ftoi_vf(XMM0 + 13, XMM0 + 14)); + EXPECT_EQ(tester.dump_to_hex_string(true), "C5F95BDCC4C1795BDEC5795BECC441795BEE"); +} + +TEST(EmitterAVX, VPSRAD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pw_sra(XMM0 + 3, XMM0 + 4, 3)); + tester.emit(IGen::pw_sra(XMM0 + 3, XMM0 + 14, 4)); + tester.emit(IGen::pw_sra(XMM0 + 13, XMM0 + 4, 5)); + tester.emit(IGen::pw_sra(XMM0 + 13, XMM0 + 14, 6)); + EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172E403C4C16172E604C59172E405C4C11172E606"); +} + +TEST(EmitterAVX, VPSRLD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pw_srl(XMM0 + 3, XMM0 + 4, 3)); + tester.emit(IGen::pw_srl(XMM0 + 3, XMM0 + 14, 4)); + tester.emit(IGen::pw_srl(XMM0 + 13, XMM0 + 4, 5)); + tester.emit(IGen::pw_srl(XMM0 + 13, XMM0 + 14, 6)); + EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172D403C4C16172D604C59172D405C4C11172D606"); +} + +TEST(EmitterAVX, VPSLLD) { + CodeTester tester; + tester.init_code_buffer(1024); + tester.emit(IGen::pw_sll(XMM0 + 3, XMM0 + 4, 3)); + tester.emit(IGen::pw_sll(XMM0 + 3, XMM0 + 14, 4)); + tester.emit(IGen::pw_sll(XMM0 + 13, XMM0 + 4, 5)); + tester.emit(IGen::pw_sll(XMM0 + 13, XMM0 + 14, 6)); + EXPECT_EQ(tester.dump_to_hex_string(true), "C5E172F403C4C16172F604C59172F405C4C11172F606"); +} \ No newline at end of file