Adding additional quick start guides and adding boundary calculating script (#244)

* Adding script to automate config file updates

* Adding doc covering basics of decomp.me

* Adding doc covering m2c basics

* Updating Ghidra doc with note about special inline functions

* Updating README with links to new docs
This commit is contained in:
Hexalotl
2024-02-07 23:14:13 -08:00
committed by GitHub
parent 01d00d834c
commit 37523c19b9
12 changed files with 558 additions and 94 deletions
+2
View File
@@ -57,7 +57,9 @@ Use `--recursive` when cloning to have ppcdis in the repository.
- [Dumping Game Files](./docs/extract_game.md)
- [Ghidra Setup](./docs/ghidra_setup.md)
- [Generating Decomp Context](./docs/generating_decomp_context.md)
- [decomp.me Basics](./docs/decomp_me_basics.md)
- [Ghidra Basics](./docs/ghidra_basics.md)
- [m2c Basics](./docs/m2c_basics.md)
- [Decomp Basics](./docs/decomp_basics.md)
## Credits
+39 -39
View File
@@ -148,6 +148,11 @@ config/rel.yml:
addrs: [0x80650880, 0x806508C0]
type: vtx
# m_msg
wipe1_v:
addrs: [0x80652AD0, 0x80652C60]
type: vtx
g_wipe1_txt:
addrs: [0x80652C60, 0x80653460]
msg/con_kaiwa2_w1_tex:
addrs: [0x80657360, 0x80657B60]
msg/con_kaiwa2_w2_tex:
@@ -167,11 +172,40 @@ config/rel.yml:
addrs: [0x8065FD4C, 0x80674F90]
aBTD_island_ldr:
addrs: [0x80674F90, 0x806817CC]
wipe1_v:
addrs: [0x80652AD0, 0x80652C60]
type: vtx
g_wipe1_txt:
addrs: [0x80652C60, 0x80653460]
aKOI_obj_e_koinobori_a_pal:
addrs: [0x806C5900, 0x806C5920]
type: pal16
obj_e_koinobori_b_pal:
addrs: [0x806C5920, 0x806C5940]
type: pal16
# ac_lotus
aLOT_obj_01_lotus_pal:
addrs: [0x806C59E0, 0x806C5A00]
type: pal16
obj_02_lotus_pal:
addrs: [0x806C5A00, 0x806C5A20]
type: pal16
obj_03_lotus_pal:
addrs: [0x806C5A20, 0x806C5A40]
type: pal16
obj_04_lotus_pal:
addrs: [0x806C5A40, 0x806C5A60]
type: pal16
obj_05_lotus_pal:
addrs: [0x806C5A60, 0x806C5A80]
type: pal16
obj_06_lotus_pal:
addrs: [0x806C5A80, 0x806C5AA0]
type: pal16
obj_07_lotus_pal:
addrs: [0x806C5AA0, 0x806C5AC0]
type: pal16
obj_08_lotus_pal:
addrs: [0x806C5AC0, 0x806C5AE0]
type: pal16
obj_09_lotus_pal:
addrs: [0x806C5AE0, 0x806C5B00]
type: pal16
mFM_beach_pal_0:
addrs: [0x80C59CA8, 0x80C59CC8]
type: pal16
@@ -356,37 +390,3 @@ config/rel.yml:
addrs: [0x80F8C448, 0x80F8C468]
type: pal16
# ac_koinobori
aKOI_obj_e_koinobori_a_pal:
addrs: [0x806C5900, 0x806C5920]
type: pal16
obj_e_koinobori_b_pal:
addrs: [0x806C5920, 0x806C5940]
type: pal16
# ac_lotus
aLOT_obj_01_lotus_pal:
addrs: [0x806C59E0, 0x806C5A00]
type: pal16
obj_02_lotus_pal:
addrs: [0x806C5A00, 0x806C5A20]
type: pal16
obj_03_lotus_pal:
addrs: [0x806C5A20, 0x806C5A40]
type: pal16
obj_04_lotus_pal:
addrs: [0x806C5A40, 0x806C5A60]
type: pal16
obj_05_lotus_pal:
addrs: [0x806C5A60, 0x806C5A80]
type: pal16
obj_06_lotus_pal:
addrs: [0x806C5A80, 0x806C5AA0]
type: pal16
obj_07_lotus_pal:
addrs: [0x806C5AA0, 0x806C5AC0]
type: pal16
obj_08_lotus_pal:
addrs: [0x806C5AC0, 0x806C5AE0]
type: pal16
obj_09_lotus_pal:
addrs: [0x806C5AE0, 0x806C5B00]
type: pal16
+50 -50
View File
@@ -34,8 +34,6 @@ m_banti.c:
.rodata: [0x80641388, 0x806413E0]
.data: [0x8064F548, 0x8064F658]
.bss: [0x8125A830, 0x8125AC80]
m_bg_tex.c:
.bss: [0x8125AC80, 0x81263080]
m_bg_item.c:
.text: [0x80378858, 0x803789F8]
.rodata: [0x806413E0, 0x806413F0]
@@ -61,7 +59,7 @@ m_cockroach.c:
m_collision_obj.c:
.text: [0x80394518, 0x80395B24]
.rodata: [0x80641CE8, 0x80641D20]
.data: [0x80651208,0x806512D8]
.data: [0x80651208, 0x806512D8]
.bss: [0x812663D0, 0x81266400]
m_common_data.c:
.text: [0x80395B24, 0x80395BE8]
@@ -100,9 +98,9 @@ m_fbdemo_triforce.c:
.text: [0x803A0ABC, 0x803A0F00]
.rodata: [0x80641F60, 0x80641FA0]
m_fbdemo_wipe1.c:
.text: [0x803A0F00, 0x803A12F8]
.rodata: [0x80641FA0, 0x80641FC0]
.data: [0x80652AD0, 0x80653558]
.text: [0x803A0F00, 0x803A12F8]
.rodata: [0x80641FA0, 0x80641FC0]
.data: [0x80652AD0, 0x80653558]
m_fbdemo_fade.c:
.text: [0x803A12F8, 0x803A1508]
.rodata: [0x80641FC0, 0x80641FD8]
@@ -315,8 +313,6 @@ m_view.c:
m_roll_lib.c:
.text: [0x803F6570, 0x803F76F0]
.rodata: [0x806434D8, 0x80643538]
sys_stacks.c:
.bss: [0x812F5670, 0x812F9670]
m_cpak.c:
.text: [0x80403830, 0x80403874]
.data: [0x8065EC98, 0x8065ECA0]
@@ -395,7 +391,7 @@ ac_animal_logo_misc.c:
.text: [0x804117D4, 0x80411A60]
ac_ball.c:
.text: [0x80411F64, 0x80413DD4]
.rodata: [0x80643A90,0x80643B60]
.rodata: [0x80643A90, 0x80643B60]
.data: [0x8065FBF8, 0x8065FC58]
.bss: [0x812F96E0, 0x812F96E8]
ac_birth_control.c:
@@ -430,22 +426,13 @@ ac_haniwa.c:
.text: [0x80427624, 0x80428F64]
.rodata: [0x806440B8, 0x806440F8]
.data: [0x80683D08, 0x80683E98]
ac_koinobori.c:
.text: [0x805B27B8, 0x805B2AE0]
.data: [0x806C58A0, 0x806C5940]
.rodata: [0x8064A978, 0x8064A990]
ac_lotus.c:
.text: [0x805B2AE0, 0x805B3010]
.rodata: [0x8064A990, 0x8064A9B0]
.data: [0x806C5940, 0x806C5B58]
.bss: [0x81327F48, 0x81327F68]
ac_psnowman.c:
.text: [0x80484098, 0x80484694]
.rodata: [0x80644C30, 0x80644C60]
.data: [0x8068A458, 0x8068A480]
ac_rope.c:
.text: [0x804967A4, 0x80496AB8]
.rodata: [0x80644DB0,0x80644DB8]
.rodata: [0x80644DB0, 0x80644DB8]
.data: [0x8068BB80, 0x8068BBE0]
ac_set_manager.c:
.text: [0x80496AB8, 0x80496F50]
@@ -527,14 +514,14 @@ ac_t_rei1.c:
ac_t_rei2.c:
.text: [0x804A99AC, 0x804A9B00]
.data: [0x8068EF38, 0x8068EF78]
ac_t_tumbler.c:
.text: [0x804A9CC4, 0x804A9F24]
.rodata: [0x80645F10, 0x80645F18]
.data: [0x8068EFE0, 0x8068F040]
ac_t_tama.c:
.text: [0x804A9B00, 0x804A9CC4]
.data: [0x8068EF78, 0x8068EFE0]
.rodata: [0x80645F00, 0x80645F10]
ac_t_tumbler.c:
.text: [0x804A9CC4, 0x804A9F24]
.rodata: [0x80645F10, 0x80645F18]
.data: [0x8068EFE0, 0x8068F040]
ac_t_umbrella.c:
.text: [0x804A9F24, 0x804AA4C8]
.data: [0x8068F040, 0x8068F310]
@@ -547,7 +534,7 @@ ac_t_zinnia1.c:
.text: [0x804AA72C, 0x804AA880]
.data: [0x8068F370, 0x8068F3B0]
ac_t_zinnia2.c:
.text: [0x804AA880, 0x804AA9d4]
.text: [0x804AA880, 0x804AA9D4]
.data: [0x8068F3B0, 0x8068F3F0]
ac_tools.c:
.text: [0x804AC034, 0x804AC2D8]
@@ -583,8 +570,8 @@ ef_room_sunshine_museum.c:
.rodata: [0x806464B8, 0x80646508]
.data: [0x8069C0A0, 0x8069C0C8]
ef_room_sunshine_minsect.c:
.text: [0x804D0F3C,0x804D1BBC]
.rodata: [0x80646508,0x80646558]
.text: [0x804D0F3C, 0x804D1BBC]
.rodata: [0x80646508, 0x80646558]
.data: [0x8069C0C8, 0x8069C0F0]
.bss: [0x81300BD0, 0x81300BD8]
m_huusui_room_ovl.c:
@@ -608,6 +595,9 @@ ac_fuusen.c:
m_mail_check_ovl.c:
.text: [0x8050F06C, 0x8050F838]
.data: [0x8069F320, 0x8069FA40]
ac_dummy.c:
.text: [0x8050F838, 0x8050F848]
.data: [0x8069FA40, 0x8069FA68]
m_all_grow_ovl.c:
.text: [0x8050F848, 0x80515340]
.rodata: [0x80649098, 0x80649110]
@@ -660,7 +650,7 @@ ac_npc_engineer.c:
ac_npc_rtc.c:
.text: [0x80573044, 0x80574134]
.rodata: [0x80649A08, 0x80649A40]
.data: [0x806BF648,0x806BF788]
.data: [0x806BF648, 0x806BF788]
ac_npc_sendo.c:
.text: [0x80574134, 0x80576468]
.rodata: [0x80649A40, 0x80649A58]
@@ -668,7 +658,7 @@ ac_npc_sendo.c:
.bss: [0x8131B258, 0x8131B298]
ac_ev_majin.c:
.text: [0x80592A40, 0x80593158]
.rodata: [0x80649D98,0x80649DA0]
.rodata: [0x80649D98, 0x80649DA0]
.data: [0x806C2B50, 0x806C2BD0]
ac_boat.c:
.text: [0x805A6CF4, 0x805A856C]
@@ -680,24 +670,30 @@ ac_douzou.c:
.data: [0x806C4DF0, 0x806C5018]
ac_dump.c:
.text: [0x805AE704, 0x805AECE8]
.rodata: [0x8064A7E8,0x8064A808]
.rodata: [0x8064A7E8, 0x8064A808]
.data: [0x806C5018, 0x806C5120]
ac_dummy.c:
.text: [0x8050F838, 0x8050F848]
.data: [0x8069FA40, 0x8069FA68]
ac_kago.c:
.text: [0x805B1A08, 0x805B1D50]
.data: [0x806C5750, 0x806C57A8]
ac_koinobori.c:
.text: [0x805B27B8, 0x805B2AE0]
.data: [0x806C58A0, 0x806C5940]
.rodata: [0x8064A978, 0x8064A990]
ac_lotus.c:
.text: [0x805B2AE0, 0x805B3010]
.rodata: [0x8064A990, 0x8064A9B0]
.data: [0x806C5940, 0x806C5B58]
.bss: [0x81327F48, 0x81327F68]
ac_mikuji.c:
.text: [0x805B414C, 0x805B44C4]
.data: [0x806C5C10, 0x806C5CA0]
ac_nameplate.c:
.text: [0x805B63FC,0x805B65C4]
.data: [0x806C6110,0x806C6138]
.text: [0x805B63FC, 0x805B65C4]
.data: [0x806C6110, 0x806C6138]
ac_radio.c:
.text: [0x805B887C,0x805B8C7C]
.rodata: [0x8064AB58,0x8064AB68]
.data: [0x806C6558,0x806C65A0]
.text: [0x805B887C, 0x805B8C7C]
.rodata: [0x8064AB58, 0x8064AB68]
.data: [0x806C6558, 0x806C65A0]
ac_shrine.c:
.text: [0x805BA4D8, 0x805BB8D0]
.rodata: [0x8064ABD0, 0x8064AC18]
@@ -714,7 +710,7 @@ ac_tama.c:
.data: [0x806C7110, 0x806C7140]
ac_toudai.c:
.text: [0x805BEA00, 0x805BFC28]
.rodata: [0x8064ACB0,0x8064AD28]
.rodata: [0x8064ACB0, 0x8064AD28]
.data: [0x806C7200, 0x806C72D0]
ac_train0.c:
.text: [0x805BFC28, 0x805C0614]
@@ -788,18 +784,16 @@ m_passwordChk_ovl.c:
.data: [0x806D1D18, 0x806D1D50]
.bss: [0x813413D0, 0x813413F8]
ac_weather.c:
.text: [0x8060193C, 0x80602E70]
.rodata: [0x8064BAE8, 0x8064BB08]
.data: [0x806D1D50, 0x806D1DA0]
ac_weather_fine.c:
.data: [0x806D1DA0, 0x806D1DB8]
.text: [0x8060193C, 0x80602E70]
.rodata: [0x8064BAE8, 0x8064BB08]
.data: [0x806D1D50, 0x806D1DA0]
ac_weather_rain.c:
.text: [0x80602E70, 0x80603494]
.rodata: [0x8064BB08,0x8064BB30]
.rodata: [0x8064BB08, 0x8064BB30]
.data: [0x806D1DB8, 0x806D1DF8]
ac_weather_snow.c:
.text: [0x80603494, 0x80603B44]
.rodata: [0x8064BB30, 0x8064BB88]
.rodata: [0x8064BB30, 0x8064BB88]
.data: [0x806D1DF8, 0x806D1E10]
ac_weather_sakura.c:
.text: [0x80603B44, 0x8060420C]
@@ -817,10 +811,8 @@ first_game.c:
.text: [0x80629CA8, 0x80629D4C]
sys_romcheck.c:
.text: [0x80629D4C, 0x80629D8C]
sys_dynamic.c:
.bss: [0x813413F8, 0x81361820]
m_play.c:
.text: [0x80629D8C,0x8062B630]
.text: [0x80629D8C, 0x8062B630]
.rodata: [0x8064D1B8, 0x8064D1C0]
.data: [0x806D46D0, 0x806D4958]
.bss: [0x81361820, 0x8148DA60]
@@ -849,8 +841,10 @@ m_prenmi.c:
audio.c:
.text: [0x8062DC04, 0x8062E96C]
.rodata: [0x8064D340, 0x8064D360]
.data: [0x806D4CB0,0x806D4D40]
.bss: [0x8148DA78,0x8148DA7C]
.data: [0x806D4CB0, 0x806D4D40]
.bss: [0x8148DA78, 0x8148DA7C]
ac_weather_fine.c:
.data: [0x806D1DA0, 0x806D1DB8]
# dataobject.obj files
data/combi/data_combi.c:
@@ -881,3 +875,9 @@ data/field/bg/earth_pal.c:
.data: [0x80C90100, 0x80C90280]
data/field/bg/rail_pal.c:
.data: [0x80F8C2C8, 0x80F8C460]
m_bg_tex.c:
.bss: [0x8125AC80, 0x81263080]
sys_stacks.c:
.bss: [0x812F5670, 0x812F9670]
sys_dynamic.c:
.bss: [0x813413F8, 0x81361820]
+45 -3
View File
@@ -1,7 +1,32 @@
# Decompilation Basics And Tips
## Determining Slice Boundaries
When adding in a new Translation Unit (TU) you currently need to manually calculate the addresses for the TU's section boundaries. You can determine the boundaries for a new TU by using the [symbol map files extracted from the game](./extract_game.md) and adding an offset to the addresses.
## Adding TU Boundaries And Asset Boundaries With TU Config Tool
Adding boundaries for the binary sections and assets of a given TU can be done by using the [TU Config script](../tools/tu_config.py) inside of the tools folder. Using this tool allows you to more quickly add in the address boundaries for each section of a TU and optionally add in any assets to the asset config file.
To use this tool follow the steps below:
1. Run the command:
``` console
python3 ./tools/tu_config.py
```
2. Type the name of the TU you want to add to the config files.
3. If it detects that the TU had data assets it will prompt you if you would like to add them to the assets config file.
4. For each data symbol you will be given the option to add them to the config file or not.
5. If you are adding the symbol to the config file it will optionally ask you if you know the data type.
6. After the tool has finished, run the [configure script](../configure.py).
A list of tool parameters can be found below:
| Argument | Description |
|------------------------|------------------------------------------------------------------------------------------------|
| `--symbol-map` | Path to the [symbol map](./extract_game.md). |
| `--binary-slices-file` | Path to the binary slices config file. Defaults to [rel_slices.yml](../config/rel_slices.yml). |
| `--asset-slices-file` | Path to the asset slices config file. Defaults to [assets.yml](../config/assets.yml). |
## Determining Slice Boundaries (Manual)
If you do not use the TU Config tool, you will need to manually add in the slice boundaries to the config file. You can determine the boundaries for a new TU by using the [symbol map files extracted from the game](./extract_game.md) and adding an offset to the addresses.
If using the symbol map, search for `<TU_NAME>.o` to find each applicable section, its start address and end address (usually the address of the next address with a different TU name attached to it). Note that some TUs may or may not have certain sections. You can determine this by searching through the symbol map and noting which sections are found.
@@ -48,6 +73,15 @@ Once the boundaries have been determined, paste them into the [slices file](../c
> .text: [0x8050F838, 0x8050F848]
>~~~
## Determining Asset Boundaries
We declare asset data such as textures and palettes in the [assets config file](../config/assets.yml) and include them into the C file in which they are referenced. This process follows similar steps as above where a new entry for each data object is declared in the config file using the starting and ending address range. You can include the data type if it is known. Optionally this step can be done with the [TU Config tool](../tools/tu_config.py) instead of manually updating the file.
Once the data address range has been added to the config file, you can add it to the C source file using an `#include` statement following the format of `#include "assets/OBJECT_NAME"` where `OBJECT_NAME` is the name of the data object.
> :warning: Due to how the configure script scans through files, if you used `.c_inc` files you currently need to "hint" to the configure script that these files are referenced by using them in the root C file. An example can be found in [`ac_lotus.c`](../src/ac_lotus.c) and [`ac_lotus_draw.c_inc`](../src/ac_lotus_draw.c_inc)
After the steps above have been completed, run the [configure script](../configure.py).
## Generating Assembly Text File
To use sites such as [decomp.me](https://decomp.me) or [m2c](https://simonsoftware.se/other/m2c.html) you will need to paste in the assembly code you wish to match. The easiest way to get the assembly is by first generating an assembly text file with symbols included. To create this file run the following command at the root of the repository:
@@ -56,4 +90,12 @@ To use sites such as [decomp.me](https://decomp.me) or [m2c](https://simonsoftwa
python3 tools/ppcdis/disassembler.py config/rel.yml build/rel_labels.pickle build/rel_relocs.pickle rel.s -m config/symbols.yml
~~~
This will generate a `rel.s` file. Once generated open the file and search for the name of the function you wish to match and copy the assembly listed in the file for that function.
This will generate a `rel.s` file. Once generated open the file and search for the name of the function you wish to match and copy the assembly listed in the file for that function.
### Copying Function Assembly
To copy the assembly for a specific function, follow the steps below:
1. Open the generated `rel.s` file.
2. Search for the name of the function. Search for the first line with the format of `.global FUNCTION_NAME` where `FUNCTION_NAME` is the name of the function you are searching for.
3. Search for the line at the bottom of the assembly code block following the format `.size FUNCTION_NAME, . - FUNCTION_NAME` where `FUNCTION_NAME` is the name of the function you are searching for.
4. Select all of the lines between those two lines, include the two lines themselves.
5. Paste the copied assembly into the tool of your choice.
+26
View File
@@ -0,0 +1,26 @@
# Decomp.me Basics
[decomp.me](https://decomp.me/) is an online collaborative tool that allows reverse engineers to match functions against the original code and share their progress with others.
## Creating A Scratch
A "scratch" refers to a WIP playground that you can use to iterate on a function or functions until you've reached a match.
In order to create a new scratch, follow the steps below:
1. Open [decomp.me](https://decomp.me/).
2. Login or register for a new account.
3. Click on `New scratch` in the top-right corner of the page.
4. Select `Gamecube/Wii` as the target platform.
5. Under the compiler section select from the "preset" dropdown either `Animal Crossing (REL)` or `Animal Crossing (DOL)` depending on where the function you are reverse engineering is located. For most game-specific code you will using the `REL` option.
![decomp.me compiler settings](./doc_assets/decomp_me_compiler_settings.png)
6. Copy the assembly for the function you want to match [from the assembly file you generated](decomp_basics.md).
7. Copy the context for the function from the [context file you generated for it](./generating_decomp_context.md).
8. Press the `Create Scratch` button.
## Matching
After your scratch has been created you can begin reverse engineering and writing your own C code. You can use tools such as [m2c](./m2c_basics.md) or [Ghidra](ghidra_basics.md) to help assist you. As you make modifications to your scratch you can see how close you are to matching the function.
Some important notes when matching:
* Variable ordering matters. If you're finding that you're stuck on the last percent or two of a function, try re-arranging the declaration of your variables.
* What you write may affect the assembly the comes before or after it. You may find that as you fill out an earlier/later section that your function matching can increase.
* Only include what you need in your pasted in context. If you include other functions/data you may find that it adds to the "matched" code. In these cases you may need to only forward-declare the respective function(s) and data.
Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 KiB

+8 -1
View File
@@ -85,4 +85,11 @@ In the above case, `mNW_original_tex_c` is aligned to `32` bytes. However, when
![Struct Editor Dialog](./doc_assets/ghidra_struct_editor_dialog.png)
6. Hit the save button near the top of the dialog, or close the dialog and press "Yes" when prompted to save the changes to the struct.
6. Hit the save button near the top of the dialog, or close the dialog and press "Yes" when prompted to save the changes to the struct.
Please note that if you re-import an entire context file any previous modifications to structs will be discarded. It is strongly recommended that after doing an initial import that you import any new functions and structs on a case-by-case basis. Otherwise, remember to re-apply the alignment changes.
## Inline Functions
While browsing through decompiled code in Ghidra you may come across calls to functions with names following a pattern of `Fun_XXXX`. The functions correspond to the built-in saved register functions and should be inlined and have a `void` return type in order to give a more correct decompilation. It is also recommended to change the name of the function to `FUNCTION_NAME` or another consistent name format to help you keep track of which functions you've made modifications to.
![Inline Func Settings](./doc_assets/ghidra_inline_register_function.png)
+16
View File
@@ -0,0 +1,16 @@
# M2C Basics
m2c is another decompilation tool that can be used to reverse engineer functions. It is avaialbe as both a [web tool](https://simonsoftware.se/other/m2c.html) and also as a [standalone tool](https://github.com/matt-kempster/m2c).
## Using M2C (Web)
In order to use m2c you will need to [generate, and then copy the assembly](./decomp_basics.md) for the function you'd like to reverse engineer and also [generate and copy the context](./generating_decomp_context.md) for the function. When generating context, make sure that the `--m2c` flag is used as m2c does not support preprocessor statements and requires additional cleanup for certain formatting issues. Make sure to delete any unneccesary data or functions if generating context from an in-progress C file.
Below the "Exisiting C Source" section, make sure that the `Compiler and Language` setting is set to `PPC, MWW, C`:
![m2c compiler settings](./doc_assets/m2c_compiler_settings.png)
Click on the `Decompile` button and m2c will generate its closests approximation of reverse-engineered C code.
## Hinting Function Arguments
In order to have a slightly more human-readable decompilation generated it is recommended that you forward-declare the function that you are reverse engineering at the bottom of the `Existing C source` section. This just allows `m2c` to more accurately generate C code.
In some cases where "fake" inheritance is used such as with actors, it may be more useful to forward-declare the function with the "inherited" struct argument type (such as `STRUCTURE_ACTOR` instead of just `ACTOR`) so that any struct-specific named variables can be used instead of defaulting to using memory offsets.
+2 -1
View File
@@ -7,4 +7,5 @@ python-Levenshtein
watchdog
pyjkernel
pcpp
pyperclip
pyperclip
ruamel.yaml
+370
View File
@@ -0,0 +1,370 @@
import os
import re
import argparse
from re import Match
from io import TextIOWrapper
from ruamel.yaml import YAML
from ruamel.yaml import CommentedMap
from ruamel.yaml import CommentedSeq
from ruamel.yaml import scalarint
#region Types
class SymbolInfo:
symbol_name: str = None
start_address: int = 0
end_address: int = 0
def __init__(self, name:str, start:int, size:int) -> None:
self.symbol_name = name
self.start_address = start
self.end_address = start + size
def get_address_range(self)->tuple[int, int]:
return self.start_address, self.end_address
class SliceSection:
section_symbol: SymbolInfo = None
symbols: list[SymbolInfo] = None
def __init__(self, symbol: SymbolInfo) -> None:
self.section_symbol = symbol
self.symbols = []
class SliceInfo:
sections: list[SliceSection] = None
def __init__(self) -> None:
self.sections = []
def get_address_range(self)->tuple[int, int]:
start_address = self.sections[0].section_symbol.start_address
end_address = self.sections[-1].section_symbol.end_address
if len(self.sections[-1].symbols) > 0:
end_address = self.sections[-1].symbols[-1].end_address
return start_address, end_address
class Address_Sort_Entry:
key : str = None
value: CommentedMap = None
starting_address: int = None
def __init__(self, entry_key: str, entry_value: CommentedMap, entry_starting_address: int) -> None:
self.key = entry_key
self.value = entry_value
self.starting_address = entry_starting_address
#endregion
#region Constants
# Dictionary for the offsets we need to apply to the addresses from the map
address_offset_map : dict[str, int] = {
".text": int("0x803702A8", 16),
".rodata": int("0x80641260", 16),
".data": int("0x8064D500", 16),
".bss": int("0x8125A7C0", 16)
}
prioritized_addresses: list[str] = [".text", ".rodata", ".data", ".bss"]
script_dir: str = os.path.dirname(os.path.realpath(__file__))
root_dir: str = os.path.abspath(os.path.join(script_dir, ".."))
default_map_path: str = os.path.join(root_dir, "dump/foresta.map")
default_binary_slice_file_path: str = os.path.join(root_dir, "config/rel_slices.yml")
default_asset_slice_file_path: str = os.path.join(root_dir, "config/assets.yml")
specific_tu_pattern_format = r"\s*([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+(?:([0-9a-fA-F]+)\s+(.+?)|\.\.\.data\.\d \(entry of \.data\))\s+({tu_name})\s*"
general_symbol_pattern = re.compile(specific_tu_pattern_format.format(tu_name = ".+\.o"))
slice_boundary_format = "[{start_address}, {end_address}]"
#endregion
#region Sorting
def sort_by_starting_address(data: CommentedMap, address_sort_keys: list[str])->CommentedMap:
if len(data) <= 1:
return data
ordered_entries : list[Address_Sort_Entry] = []
for key in data.keys():
entry = data[key]
starting_address = 0
for address_key in address_sort_keys:
if address_key not in entry:
continue
starting_address = entry[address_key]
break
ordered_entries.append(Address_Sort_Entry(key, entry, starting_address))
ordered_entries.sort(key=lambda entry: entry.starting_address)
ordered_map = CommentedMap()
for ordered_entry in ordered_entries:
ordered_map[ordered_entry.key] = ordered_entry.value
if ordered_entry.key not in data.ca.items:
continue
ordered_map.ca.items[ordered_entry.key] = data.ca.items[ordered_entry.key]
return ordered_map
#endregion
#region Symbol Gathering
def get_symbol_from_map_match(symbol_match: Match, address_offset: int)->SymbolInfo:
name = symbol_match.group(5)
start_address = int(symbol_match.group(1), 16) + address_offset
size = int(symbol_match.group(2), 16)
return SymbolInfo(name, start_address, size)
def gather_symbols_for_section(address_offset: int, file_reader:TextIOWrapper, slice_info: SliceInfo, starting_match: Match):
section_tu_name = starting_match.group(6)
section_symbol = get_symbol_from_map_match(starting_match, address_offset)
section = SliceSection(section_symbol)
slice_info.sections.append(section)
# Keep reading until the end of the section has been reached
line: str = None
while True:
line = file_reader.readline()
if not line:
return
if "entry of .data" in line:
continue
break
next_match: Match = general_symbol_pattern.match(line)
while True:
# Check if the next match belongs to this group or not
curr_match = next_match
if not curr_match:
break
curr_match_tu_name = curr_match.group(6)
if curr_match_tu_name != section_tu_name:
break
curr_match_symbol_name = curr_match.group(5)
if curr_match_symbol_name in address_offset_map:
gather_symbols_for_section(address_offset, file_reader, slice_info, starting_match)
break
# Make symbol for current match
symbol = get_symbol_from_map_match(curr_match, address_offset)
# Check the next match to get a more accurate ending address
next_line = file_reader.readline()
if not next_line:
# Eof reached. Just add as is
section.symbols.append(symbol)
# Match against the next line
next_match = general_symbol_pattern.match(next_line)
if not next_match:
# Non matching line
section.symbols.append(symbol)
# Use start address as the end boundary for the slice
next_match_start_address = int(next_match.group(1), 16) + address_offset
symbol.end_address = next_match_start_address
section.symbols.append(symbol)
def gather_tu_symbols(tu_name: str, map_path: str)->dict[str, SliceInfo]:
gathered_symbols: dict[str, SliceInfo] = {}
tu_regex = re.compile(specific_tu_pattern_format.format(tu_name = tu_name))
with open(map_path, "r", encoding="utf-8", newline="\n") as file_reader:
while True:
line = file_reader.readline()
if not line:
break
# Check if the line matches the TU name
match = tu_regex.match(line)
if not match:
continue
# It is a match
slice_name = match.group(5)
if slice_name not in address_offset_map:
continue
# Add to dictionary
offset = address_offset_map[slice_name]
slice_info = SliceInfo()
gathered_symbols[slice_name] = slice_info
gather_symbols_for_section(offset, file_reader, slice_info, match)
return gathered_symbols
#endregion
#region Asset Slices Config File
def update_asset_slice_config(tu_name: str, binary_slice_file_path: str, asset_slice_file_path: str, symbols_for_tu: dict[str, SliceInfo]):
if ".data" not in symbols_for_tu:
return
print("Add data entries to: " + asset_slice_file_path + "? (y/n)")
reply = input().lower()
if reply != "y" and reply != "yes":
return
yaml = YAML(typ="rt")
data: CommentedMap = None
with open(asset_slice_file_path, "r", encoding="utf-8", newline="\n") as file_reader:
data = yaml.load(file_reader)
binary_commented_map : CommentedMap = None
binary_commented_map_key: str = None
if "rel" in binary_slice_file_path:
binary_commented_map_key = "config/rel.yml"
else:
binary_commented_map_key = "config/dol.yml"
binary_commented_map = data[binary_commented_map_key]
for section in symbols_for_tu[".data"].sections:
for asset_symbol in section.symbols:
print("Add entry for: " + asset_symbol.symbol_name + "? (y/n)")
reply = input().lower()
if reply != "y" and reply != "yes":
continue
print("What is the asset type? (optional)")
asset_type = input()
asset_commented_map : CommentedMap = None
if binary_commented_map.__contains__(asset_symbol.symbol_name):
asset_commented_map = binary_commented_map[asset_symbol.symbol_name]
else:
asset_commented_map = CommentedMap()
binary_commented_map.insert(len(binary_commented_map), asset_symbol.symbol_name, asset_commented_map)
binary_commented_map.ca.insert(asset_symbol.symbol_name, asset_symbol.symbol_name)
# Add in the address range
address_commented_seq: CommentedSeq = None
if asset_commented_map.__contains__("addrs"):
# Re-use the same commented section
address_commented_seq = asset_commented_map["addrs"]
address_commented_seq.clear()
else:
address_commented_seq: CommentedSeq = CommentedSeq()
# Assign to the slice section
asset_commented_map["addrs"] = address_commented_seq
# Add in the start and end address
start_address, end_address = asset_symbol.get_address_range()
address_commented_seq.fa.set_flow_style()
address_commented_seq.append(scalarint.HexCapsInt(start_address))
address_commented_seq.append(scalarint.HexCapsInt(end_address))
# Add in the asset type
if not asset_type or asset_type is None:
# Type not specified
if asset_commented_map.__contains__("type"):
# Using a previous entry where the type was used, so delete it
asset_commented_map.__delitem__("type")
continue
asset_commented_map["type"] = asset_type
# Sort by starting address and replace
data[binary_commented_map_key] = sort_by_starting_address(binary_commented_map, ["addrs"])
# Write out to file
with open(asset_slice_file_path, "w", encoding="utf-8", newline="\n") as file_writer:
yaml.dump(data, file_writer)
#endregion
#region Slice Config File
def update_binary_slice_config(tu_name: str, slice_file_path: str, symbols_for_tu: dict[str, SliceInfo]):
yaml = YAML(typ="rt")
yaml.indent(mapping=4, sequence=4, offset=4)
data: CommentedMap = None
with open(slice_file_path, "r", encoding="utf-8", newline="\n") as file_reader:
data = yaml.load(file_reader)
tu_c_file_name = tu_name.replace(".o", ".c")
slice_commented_map : CommentedMap = None
if data.__contains__(tu_c_file_name):
print("TU already exists in file. Overwrite values? (y/n)")
reply = input().lower()
if reply != "y" and reply != "yes":
return
# Re-use the existing commented map
slice_commented_map = data[tu_c_file_name]
else:
# Create a new commented map
slice_commented_map : CommentedMap = CommentedMap()
# Add to the end of the file
data.insert(len(data), tu_c_file_name, slice_commented_map)
for slice_name, slice_info in symbols_for_tu.items():
if len(slice_info.sections) == 0:
# No symbols for this TU
continue
address_commented_seq: CommentedSeq = None
if slice_commented_map.__contains__(slice_name):
# Re-use the same commented section
address_commented_seq = slice_commented_map[slice_name]
address_commented_seq.clear()
else:
address_commented_seq: CommentedSeq = CommentedSeq()
# Assign to the slice section
slice_commented_map[slice_name] = address_commented_seq
# Add in the start and end address
start_address, end_address = slice_info.get_address_range()
address_commented_seq.fa.set_flow_style()
address_commented_seq.append(scalarint.HexCapsInt(start_address))
address_commented_seq.append(scalarint.HexCapsInt(end_address))
# Sort by address
data = sort_by_starting_address(data, prioritized_addresses)
# Write out to file
with open(slice_file_path, "w", encoding="utf-8", newline="\n") as file_writer:
yaml.dump(data, file_writer)
#endregion
#region Main
def main():
parser = argparse.ArgumentParser(prog="Translation Unit Config Updater", description="Adds the corresponding addresses to slice config files")
parser.add_argument("tu_name", nargs="?", help="Name of the translation unit to get addresses for")
parser.add_argument("-map", "--symbol-map", dest="symbol_map", help="Path to the symbol map file used for reference", action="store")
parser.add_argument("-binary", "--binary-slices-file", dest="binary_slices_file", help="Path to the binary slices file to write to", action="store")
parser.add_argument("-asset", "--asset-slices-file", dest="asset_slices_file", help="Path to the asset slices file to write to", action="store")
args = parser.parse_args()
# Make sure the translation unit name ends with .o
tu_name = args.tu_name
if tu_name[-2:] != ".o":
tu_name = tu_name + ".o"
symbol_map_path = args.symbol_map
if not symbol_map_path:
symbol_map_path = default_map_path
binary_slices_file = args.binary_slices_file
if not binary_slices_file:
binary_slices_file = default_binary_slice_file_path
asset_slices_file = args.asset_slices_file
if not asset_slices_file:
asset_slices_file = default_asset_slice_file_path
# Get the symbols for the TU
symbols_for_tu = gather_tu_symbols(tu_name, symbol_map_path)
# Make a call to update the binary file
update_binary_slice_config(tu_name, binary_slices_file, symbols_for_tu)
update_asset_slice_config(tu_name, binary_slices_file, asset_slices_file, symbols_for_tu)
if __name__ == "__main__":
main()
#endregion