Add support for M1 assembly, the hex0/1/2 family, and kaem files (#1295)

This commit is contained in:
gtker 2025-11-18 21:28:04 +01:00 committed by GitHub
parent 21f3da2c38
commit 179155fe73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 1204 additions and 0 deletions

View File

@ -423,6 +423,9 @@ Haskell
Haxe
Hcl
Hex
Hex0
Hex1
Hex2
HiCAD
hledger
Hlsl
@ -444,6 +447,7 @@ Julia
Julius
Just
KakouneScript
KaemFile
Kotlin
Lean
Less
@ -455,6 +459,7 @@ LLVM
Logtalk
Lua
Lucius
M1Assembly
Madlang
Max
Makefile

View File

@ -748,6 +748,18 @@
"blank": true,
"extensions": ["hex"]
},
"Hex0": {
"extensions": ["hex0"],
"line_comment": ["#", ";"]
},
"Hex1": {
"extensions": ["hex1"],
"line_comment": ["#", ";"]
},
"Hex2": {
"extensions": ["hex2"],
"line_comment": ["#", ";"]
},
"HiCad": {
"name": "HICAD",
"line_comment": ["REM", "rem"],
@ -924,6 +936,11 @@
"quotes": [["\\\"", "\\\""], ["'", "'"]],
"extensions": ["kak"]
},
"Kaem": {
"name": "Kaem",
"line_comment": ["#"],
"extensions": ["kaem"]
},
"Kotlin": {
"line_comment": ["//"],
"multi_line_comments": [["/*", "*/"]],
@ -1042,6 +1059,12 @@
"quotes": [["\\\"", "\\\""], ["'", "'"]],
"extensions": ["lucius"]
},
"M1Assembly": {
"name": "M1 Assembly",
"extensions": ["m1"],
"line_comment": ["#", ";"],
"quotes": [["\\\"", "\\\""]]
},
"M4": {
"extensions": ["m4"],
"line_comment": ["#", "dnl"],

154
tests/data/hex0.hex0 Normal file
View File

@ -0,0 +1,154 @@
# 154 lines 89 code 34 comments 31 blanks
# SPDX-FileCopyrightText: 2017 Jeremiah Orians <jeremiah@pdp10.guru>
# SPDX-FileCopyrightText: 2023 Andrius Štikonas <andrius@stikonas.eu>
#
# SPDX-License-Identifier: GPL-3.0-or-later
## ELF Header
#:ELF_base
7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number
02 ## e_ident[EI_CLASS] Indicating 64 bit
01 ## e_ident[EI_DATA] Indicating little endianness
01 ## e_ident[EI_VERSION] Indicating original elf
03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict
00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares
00 00 00 00 00 00 00 ## e_ident[EI_PAD]
02 00 ## e_type Indicating Executable
3E 00 ## e_machine Indicating AMD64
01 00 00 00 ## e_version Indicating original elf
78 00 60 00 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address)
40 00 00 00 00 00 00 00 ## e_phoff Address of program header table
00 00 00 00 00 00 00 00 ## e_shoff Address of section header table
00 00 00 00 ## e_flags
40 00 ## e_ehsize Indicating our 64 Byte header
38 00 ## e_phentsize size of a program header table
01 00 ## e_phnum number of entries in program table
00 00 ## e_shentsize size of a section header table
00 00 ## e_shnum number of entries in section table
00 00 ## e_shstrndx index of the section names
## Program Header
#:ELF_program_headers
01 00 00 00 ## p_type
01 00 00 00 ## p_flags: PF-X = 1
00 00 00 00 00 00 00 00 ## p_offset
00 00 60 00 00 00 00 00 ## p_vaddr
00 00 60 00 00 00 00 00 ## p_physaddr
E5 00 00 00 00 00 00 00 ## p_filesz
E5 00 00 00 00 00 00 00 ## p_memsz
01 00 00 00 00 00 00 00 ## Required alignment
#:ELF_text
# Where the ELF Header is going to hit
# Simply jump to _start
# Our main function
#:_start (0x600078)
58 ; pop_rax # Get the number of arguments
5F ; pop_rdi # Get the program name
5F ; pop_rdi # Get the actual input name
31F6 ; xor_esi,esi # prepare read_only, rsi = 0
6A 02 ; push !2 # prepare syscall number
58 ; pop_rax # the syscall number for open()
99 ; cdq # Extra sure, rdx = 0
0F05 ; syscall # Now open that damn file
5F ; pop_rdi # Get the actual output name
50 ; push_rax # Preserve the file pointer we were given
66BE 4102 ; mov_si, @577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC
66BA C001 ; mov_dx, @448 # Prepare file as RWX for owner only (700 in octal)
6A 02 ; push !2 # prepare syscall number
58 ; pop_rax # the syscall number for open()
0F05 ; syscall # Now open that damn file
93 ; xchg_ebx,eax # Preserve the file pointer we were given
99 ; cdq # rdx = 0 since file descriptor is nonnegative
FFC2 ; inc_edx # rdx = 1 (count for read/write)
#:loop_reset_all (0x600096)
31ED ; xor_ebp,ebp # ebp = 0 (no prior hex val)
# Comment tracking is done with ecx.
# ecx is decremented if we hit a
# comment (';' or '#') and reset
# if we hit a new-line.
#:loop_reset_comment (0x600098)
52 ; push_rdx
59 ; pop_rcx # Set no current comment
#:loop_add_comment (0x60009A)
FFC9 ; dec_ecx
#:loop (0x60009C)
# Read a byte
5F ; pop_rdi # Get infile
54 ; push_rsp
5E ; pop_rsi # Set buffer
# rdx is already set to 1.
31C0 ; xor_eax,eax # Set read syscall in rax
51 ; push_rcx # Save comment tracking
0F05 ; syscall # Do the actual read
59 ; pop_rcx # Restore comment tracking
57 ; push_rdi # Re-save infile
85C0 ; test_eax,eax # Check what we got
75 06 ; jne !cont # No EOF
# Exit successfully
B0 3C ; mov_al, !60 # Set exit syscall in rax
31FF ; xor_edi,edi # Set return success (rdi = 0)
0F05 ; syscall # Exit
#:cont (0x6000B0)
8A06 ; mov_al,[rsi] # Move prog byte in eax
# New line check
3C 0A ; cmp_al, !10 # Check new-line
74 E2 ; je !loop_reset_comment # If new-line, end comment handling
# In comment check
85C9 ; test_ecx,ecx # Skip byte if we are in a comment
75 E2 ; jne !loop
# Start comment check
3C 23 ; cmp_al, !35 # Start of '#' comment
74 DC ; je !loop_add_comment
3C 3B ; cmp_al, !59 # Start of ';' comment
74 D8 ; je !loop_add_comment
# Start of hex str to int
2C 30 ; sub_al, !48 # Subtract ascii '0' from al
2C 0A ; sub_al, !10 # Check for value in '0'-'9'
72 08 ; jb !write # We have hex value, write it
2C 07 ; sub_al, !7 # Subtract ('A'-'0') from al
24 DF ; and_al, !0xDF # Remove lower case bit
3C 07 ; cmp_al, !7 # Check for value 'A'-'F'
73 CC ; jae !loop # We have hex value, write it
#:write (0x6000D0)
C1E5 04 ; shl_ebp, !4 # Shift up existing hex digit
04 0A ; add_al, !10 # Finish converting ascii to raw value
01C5 ; add_ebp,eax # Combine the hex digits
# Check if this is first digit in hex val
F7DB ; neg_ebx # Flip sign of r10 to indicate we got a digit
7C C1 ; jl !loop # Negative -> first digit, get another one
# We have both digits in low byte of ebp, good to write
892E ; mov_[rsi],ebp # Move edge to buffer
89DF ; mov_edi,ebx # Move outfile to rdi
B0 01 ; mov_al, !1 # Set write syscall in rax
0F05 ; syscall # Do the write
EB B1 ; jmp !loop_reset_all # Start a fresh byte
#:ELF_end (0x6000E5)

589
tests/data/hex1.hex1 Normal file
View File

@ -0,0 +1,589 @@
# 589 lines 387 code 91 comments 111 blanks
# SPDX-FileCopyrightText: 2016 Jeremiah Orians <jeremiah@pdp10.guru>
# SPDX-FileCopyrightText: 2017 Jan Nieuwenhuizen <janneke@gnu.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later
## ELF Header
# :ELF_base
7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number
02 ## e_ident[EI_CLASS] Indicating 64 bit
01 ## e_ident[EI_DATA] Indicating little endianness
01 ## e_ident[EI_VERSION] Indicating original elf
03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict
00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares
00 00 00 00 00 00 00 ## e_ident[EI_PAD]
02 00 ## e_type Indicating Executable
3E 00 ## e_machine Indicating AMD64
01 00 00 00 ## e_version Indicating original elf
78 00 60 00 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address)
40 00 00 00 00 00 00 00 ## e_phoff Address of program header table
00 00 00 00 00 00 00 00 ## e_shoff Address of section header table
00 00 00 00 ## e_flags
40 00 ## e_ehsize Indicating our 64 Byte header
38 00 ## e_phentsize size of a program header table
01 00 ## e_phnum number of entries in program table
00 00 ## e_shentsize size of a section header table
00 00 ## e_shnum number of entries in section table
00 00 ## e_shstrndx index of the section names
## Program Header
# :ELF_program_headers
01 00 00 00 ## p_type
07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7
00 00 00 00 00 00 00 00 ## p_offset
00 00 60 00 00 00 00 00 ## p_vaddr
00 00 60 00 00 00 00 00 ## p_physaddr
EF 05 00 00 00 00 00 00 ## p_filesz
EF 05 00 00 00 00 00 00 ## p_memsz
01 00 00 00 00 00 00 00 ## Required alignment
# :ELF_text
# Where the ELF Header is going to hit
# Simply jump to _start
# Our main function
# Register usage:
# RAX, RDX, RSI, RDI => Temps
# R15 => Flag
# R14 => High bits
# R13 => IP
# R12 => MALLOC
# R11 => HEAD
# Struct format: (size 24)
# NEXT => 0
# TARGET => 8
# NAME => 16
# :_start
48C7C7 00000000 ; mov_rdi, %0 # Get current pointer
E8 %w ; call %malloc # Get current HEAP
4889C7 ; mov_rdi,rax # Using current
4989C4 ; mov_r12,rax # Setup MALLOC
4881C7 00008000 ; add_rdi, %8388608 # Create space for temp [8MB]
E8 %w ; call %malloc # Give ourselves 8192000 bytes to work with
4C8925 %T ; mov_[rip+DWORD],r12 %scratch # Allocate space for scratch area
4981C4 00080000 ; add_r12, %0x800 # 2 KiB of scratch
58 ; pop_rax # Get the number of arguments
5F ; pop_rdi # Get the program name
5F ; pop_rdi # Get the actual input name
48C7C6 00000000 ; mov_rsi, %0 # prepare read_only
48C7C0 02000000 ; mov_rax, %2 # the syscall number for open()
0F05 ; syscall # Now open that damn file
4989C1 ; mov_r9,rax # Preserve the file pointer we were given
5F ; pop_rdi # Get the actual output name
48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC
48C7C2 C0010000 ; mov_rdx, %448 # Prepare file as RWX for owner only (700 in octal)
48C7C0 02000000 ; mov_rax, %2 # the syscall number for open()
0F05 ; syscall # Now open that damn file
4883F8 00 ; cmp_rax, !0 # Check for missing output
0F8F %R ; jg %_start_out # Have real input
48C7C0 01000000 ; mov_rax, %1 # Use stdout
:R # :_start_out
4989C2 ; mov_r10,rax # Preserve the file pointer we were given
E8 %H ; call %ClearScratch # Zero scratch
49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing
49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum
49C7C5 00006000 ; mov_r13, %0x00600000 # Our starting IP
49C7C3 00000000 ; mov_r11, %0 # HEAD = NULL
E8 %a ; call %First_pass # Process it
# rewind input file
4C89CF ; mov_rdi,r9 # Using our input file
48C7C6 00000000 ; mov_rsi, %0 # Offset Zero
48C7C2 00000000 ; mov_rdx, %0 # Whence Zero
48C7C0 08000000 ; mov_rax, %8 # lseek
4153 ; push_r11 # Protect HEAD
0F05 ; syscall
415B ; pop_r11 # Restore HEAD
49C7C7 FFFFFFFF ; mov_r15, %-1 # Our flag for byte processing
49C7C6 00000000 ; mov_r14, %0 # temp storage for the sum
49C7C5 00006000 ; mov_r13, %0x00600000 # Our starting IP
E8 %k ; call %Second_pass # Process it
E9 %v ; jmp %Done
:a # :First_pass
E8 %x ; call %Read_byte
# Deal with EOF
4883F8 FC ; cmp_rax, !-4
0F84 %i ; je %First_pass_done
# Check for :
4883F8 3A ; cmp_rax, !0x3A
0F85 %b ; jne %First_pass_0
# Deal with label
E9 %C ; jmp %StoreLabel
:b # :First_pass_0
# Check for !
4883F8 21 ; cmp_rax, !0x21
0F84 %h ; je %First_pass_pointer
# Check for @
4883F8 40 ; cmp_rax, !0x40
0F84 %h ; je %First_pass_pointer
# Check for $
4883F8 24 ; cmp_rax, !0x24
0F84 %h ; je %First_pass_pointer
# Check for %
4883F8 25 ; cmp_rax, !0x25
0F84 %h ; je %First_pass_pointer
# Check for &
4883F8 26 ; cmp_rax, !0x26
0F84 %h ; je %First_pass_pointer
# Deal with everything else
E8 %j ; call %hex # Process our char
# Deal with EOF
4883F8 FC ; cmp_rax, !-4
0F84 %i ; je %First_pass_done
# deal with -1 values
4883F8 00 ; cmp_rax, !0
0F8C %a ; jl %First_pass
# deal with toggle
4983FF 00 ; cmp_r15, !0
0F84 %c ; je %First_pass_1
4983C5 01 ; add_r13, !1 # Increment IP
:c # :First_pass_1
49F7D7 ; not_r15
E9 %a ; jmp %First_pass
:d # :Update_Pointer
# Check for !
4883F8 21 ; cmp_rax, !0x21
0F84 %g ; je %Update_Pointer_1
# Check for @
4883F8 40 ; cmp_rax, !0x40
0F84 %f ; je %Update_Pointer_2
# Check for $
4883F8 24 ; cmp_rax, !0x24
0F84 %f ; je %Update_Pointer_2
# Check for %
4883F8 25 ; cmp_rax, !0x25
0F84 %e ; je %Update_Pointer_4
# Check for &
4883F8 26 ; cmp_rax, !0x26
0F84 %e ; je %Update_Pointer_4
# deal with bad input
E8 %Q # call %fail
:e # :Update_Pointer_4
4983C5 02 ; add_r13, !2 # Increment IP
:f # :Update_Pointer_2
4983C5 01 ; add_r13, !1 # Increment IP
:g # :Update_Pointer_1
4983C5 01 ; add_r13, !1 # Increment IP
C3 ; ret
:h # :First_pass_pointer
# Deal with Pointer to label
E8 %d ; call %Update_Pointer # Increment IP
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Using scratch
E8 %A ; call %consume_token # Read token
E8 %H ; call %ClearScratch # Throw away token
4883F8 3E ; cmp_rax, !0x3E # check for '>'
0F85 %a ; jne %First_pass # Loop again
# Deal with %label>label case
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch
E8 %A ; call %consume_token # get token
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %a ; jmp %First_pass # Loop again
:i # :First_pass_done
C3 ; ret
:j # :hex
# deal with EOF
4883F8 FC ; cmp_rax, !-4
0F84 %n ; je %EOF
# deal with line comments starting with #
4883F8 23 ; cmp_rax, !0x23
0F84 %s ; je %ascii_comment
# deal with line comments starting with ;
4883F8 3B ; cmp_rax, !0x3B
0F84 %s ; je %ascii_comment
# deal all ascii less than 0
4883F8 30 ; cmp_rax, !0x30
0F8C %r ; jl %ascii_other
# deal with 0-9
4883F8 3A ; cmp_rax, !0x3A
0F8C %o ; jl %ascii_num
# deal with all ascii less than A
4883F8 41 ; cmp_rax, !0x41
0F8C %r ; jl %ascii_other
# deal with A-F
4883F8 47 ; cmp_rax, !0x47
0F8C %q ; jl %ascii_high
# deal with all ascii less than a
4883F8 61 ; cmp_rax, !0x61
0F8C %r ; jl %ascii_other
# deal with a-f
4883F8 67 ; cmp_rax, !0x67
0F8C %p ; jl %ascii_low
# The rest that remains needs to be ignored
E9 %r ; jmp %ascii_other
:k # :Second_pass
E8 %x ; call %Read_byte
# Deal with EOF
4883F8 FC ; cmp_rax, !-4
0F84 %m ; je %Second_pass_done
# Simply drop the label
4883F8 3A ; cmp_rax, !0x3A
0F85 %l ; jne %Second_pass_0
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Using scratch
E8 %A ; call %consume_token # Read token
E8 %H ; call %ClearScratch # Throw away token
E9 %k ; jmp %Second_pass
:l # :Second_pass_0
# Deal with % pointer
4883F8 25 ; cmp_rax, !0x25
0F84 %L ; je %StorePointer_rel4
# Deal with @ pointer
4883F8 40 ; cmp_rax, !0x40
0F84 %M ; je %StorePointer_rel2
# Deal with ! pointer
4883F8 21 ; cmp_rax, !0x21
0F84 %N ; je %StorePointer_rel1
# Deal with & pointer
4883F8 26 ; cmp_rax, !0x26
0F84 %O ; je %StorePointer_abs4
# Deal with $ pointer
4883F8 24 ; cmp_rax, !0x24
0F84 %P ; je %StorePointer_abs2
# :Second_pass_1
# Deal with everything else
E8 %j ; call %hex # Process our char
# Deal with EOF
4883F8 FC ; cmp_rax, !-4
0F84 %m ; je %Second_pass_done
# deal with -1 values
4883F8 00 ; cmp_rax, !0
0F8C %k ; jl %Second_pass
# deal with toggle
4983FF 00 ; cmp_r15, !0
0F84 %u ; je %print
# process first byte of pair
4989C6 ; mov_r14,rax
49C7C7 00000000 ; mov_r15, %0
E9 %k ; jmp %Second_pass
:m # :Second_pass_done
:n # :EOF
C3 ; ret
:o # :ascii_num
83E8 30 ; sub_rax, !0x30
C3 ; ret
:p # :ascii_low
83E8 57 ; sub_rax, !0x57
C3 ; ret
:q # :ascii_high
83E8 37 ; sub_rax, !0x37
C3 ; ret
:r # :ascii_other
48C7C0 FFFFFFFF ; mov_rax, %-1
C3 ; ret
:s # :ascii_comment
E8 %x ; call %Read_byte
4883F8 0D ; cmp_rax, !0x0D
0F84 %t ; je %ascii_comment_cr
4883F8 0A ; cmp_rax, !0x0A
0F85 %s ; jne %ascii_comment
:t # :ascii_comment_cr
48C7C0 FFFFFFFF ; mov_rax, %-1
C3 ; ret
# process second byte of pair
:u # :print
# update the sum and store in output
49C1E6 04 ; shl_r14, !4
4C01F0 ; add_rax,r14
# flip the toggle
49F7D7 ; not_r15
# Print our first Hex
48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want
E8 %z ; call %print_chars
4983C5 01 ; add_r13, !1 # Increment IP
E9 %k ; jmp %Second_pass
:v # :Done
# program completed Successfully
48C7C7 00000000 ; mov_rdi, %0 # All is well
48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax
0F05 ; syscall # Call it a good day
# Malloc isn't actually required if the program being built fits in the initial memory
# However, it doesn't take much to add it.
# Requires a value in RDI
:w # :malloc
48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK
4153 ; push_r11 # Protect r11
0F05 ; syscall # call the Kernel
415B ; pop_r11 # Restore r11
C3 ; ret
:x # :Read_byte
# Attempt to read 1 byte from STDIN
48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want
488D35 %S ; lea_rsi,[rip+DWORD] %write # Where to put it
4C89CF ; mov_rdi,r9 # Where are we reading from
48C7C0 00000000 ; mov_rax, %0 # the syscall number for read
4153 ; push_r11 # Protect r11
0F05 ; syscall # call the Kernel
415B ; pop_r11 # Restore r11
4885C0 ; test_rax,rax # check what we got
0F84 %y ; je %Read_byte_1 # Got EOF call it done
# load byte
8A05 %S ; mov_al,[rip+DWORD] %write # load char
480FB6C0 ; movzx_rax,al # We have to zero extend it to use it
C3 ; ret
# Deal with EOF
:y # :Read_byte_1
48C7C0 FCFFFFFF ; mov_rax, %-4 # Put EOF in rax
C3 ; ret
:z # :print_chars
50 ; push_rax # Push address of chars onto stack
4889E6 ; mov_rsi,rsp # What we are writing
4C89D7 ; mov_rdi,r10 # Write to target file
48C7C0 01000000 ; mov_rax, %1 # the syscall number for write
4153 ; push_r11 # Protect HEAD
0F05 ; syscall # call the Kernel
415B ; pop_r11 # Restore HEAD
58 ; pop_rax # deallocate stack
C3 ; ret
# Receives pointer in RBX
# Writes out char and updates RBX
:A # :consume_token
E8 %x ; call %Read_byte # Consume_token
# Check for \t
4883F8 09 ; cmp_rax, !0x09
0F84 %B ; je %consume_token_done
# Check for \n
4883F8 0A ; cmp_rax, !0x0A
0F84 %B ; je %consume_token_done
# Check for ' '
4883F8 20 ; cmp_rax, !0x20
0F84 %B ; je %consume_token_done
# Check for '>'
4883F8 3E ; cmp_rax, !0x3E
0F84 %B ; je %consume_token_done
# Looks like we are still reading token
8803 ; mov_[rbx],al # Store char
4883C3 01 ; add_rbx, !1 # Point to next spot
E9 %A ; jmp %consume_token # loop until done
:B # :consume_token_done
48C7C1 00000000 ; mov_rcx, %0 # Pad with nulls
48890B ; mov_[rbx],rcx
4883C3 08 ; add_rbx, !8
C3 ; ret
:C # :StoreLabel
4C89E0 ; mov_rax,r12 # ENTRY
4981C4 18000000 ; add_r12, %24 # CALLOC
4C8968 08 ; mov_[rax+BYTE],r13 !8 # ENTRY->TARGET = IP
4C8918 ; mov_[rax],r11 # ENTRY->NEXT = JUMP_TABLE
4989C3 ; mov_r11,rax # JUMP_TABLE = ENTRY
4D8963 10 ; mov_[r11+BYTE],r12 !16 # ENTRY->NAME = TOKEN
4C89E3 ; mov_rbx,r12 # Write Starting after struct
E8 %A ; call %consume_token # Collect whole string
4989DC ; mov_r12,rbx # Update HEAP
E9 %a ; jmp %First_pass
:D # :GetTarget
488B3D %T ; mov_rdi,[rip+DWORD] %scratch # Reset scratch
4C89D9 ; mov_rcx,r11 # Grab JUMP_TABLE
488B71 10 ; mov_rsi,[rcx+BYTE] !16 # I->NAME
:E # :GetTarget_loop
8A06 ; mov_al,[rsi] # I->NAME[0]
8A1F ; mov_bl,[rdi] # scratch[0]
480FB6DB ; movzx_rbx,bl # Zero extend
480FB6C0 ; movzx_rax,al # Zero extend
38D8 ; cmp_al,bl # IF TOKEN == I->NAME
0F85 %F ; jne %GetTarget_miss # Oops
4883C6 01 ; add_rsi, !1
4881C7 01000000 ; add_rdi, %1
3C 00 ; cmp_al, !0
0F85 %E ; jne %GetTarget_loop # Loop until
E9 %G ; jmp %GetTarget_done # Match
# Miss
:F # :GetTarget_miss
488B09 ; mov_rcx,[rcx] # I = I->NEXT
4883F9 00 ; cmp_rcx, !0 # IF NULL == I
0F84 %Q ; je %fail # Abort hard
488B71 10 ; mov_rsi,[rcx+BYTE] !16 # I->NAME
488B3D %T ; mov_rdi,[rip+DWORD] %scratch # Reset scratch
E9 %E ; jmp %GetTarget_loop
:G # :GetTarget_done
488B41 08 ; mov_rax,[rcx+BYTE] !8 # Get address
C3 ; ret
:H # :ClearScratch
50 ; push_rax # Protect against changes
53 ; push_rbx # And overwrites
51 ; push_rcx # While we work
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Where our scratch is
48C7C0 00000000 ; mov_rax, %0 # Using null
:I # :ClearScratch_loop
488B0B ; mov_rcx,[rbx] # Get current value
8803 ; mov_[rbx],al # Because we want null
4883C3 01 ; add_rbx, !1 # Increment
4883F9 00 ; cmp_rcx, !0 # Check if we hit null
0F85 %I ; jne %ClearScratch_loop # Keep looping
59 ; pop_rcx # Don't Forget to
5B ; pop_rbx # Restore Damage
58 ; pop_rax # Entirely
C3 ; ret
:J # :StorePointer
E8 %d ; call %Update_Pointer # Increment IP
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch
E8 %A ; call %consume_token # get token
50 ; push_rax # Protect base_sep_p
488B05 %T ; mov_rax,[rip+DWORD] %scratch # Pointer to scratch
E8 %D ; call %GetTarget # Get address of pointer
E8 %H ; call %ClearScratch # Clean up after ourselves
4C89EA ; mov_rdx,r13 # base = IP
5B ; pop_rbx # Restore base_sep_p
4883FB 3E ; cmp_rbx, !0x3E # If base_sep_p == '>'
0F85 %K ; jne %StorePointer_done # If not
# Deal with %label>label case
50 ; push_rax # We need to preserve main target
488B1D %T ; mov_rbx,[rip+DWORD] %scratch # Write to scratch
E8 %A ; call %consume_token # get token
488B05 %T ; mov_rax,[rip+DWORD] %scratch # Pointer to scratch
E8 %D ; call %GetTarget # Get address of pointer
E8 %H ; call %ClearScratch # Clean up after ourselves
4889C2 ; mov_rdx,rax # Use our new base
58 ; pop_rax # Restore main target
:K # :StorePointer_done
C3 ; ret
:L # :StorePointer_rel4
E8 %J ; call %StorePointer # Do Common
4829D0 ; sub_rax,rdx # target - ip
48C7C2 04000000 ; mov_rdx, %4 # set the size of chars we want
E8 %z ; call %print_chars
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %k ; jmp %Second_pass
:M # :StorePointer_rel2
E8 %J ; call %StorePointer # Do Common
4829D0 ; sub_rax,rdx # target - ip
48C7C2 02000000 ; mov_rdx, %2 # set the size of chars we want
E8 %z ; call %print_chars
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %k ; jmp %Second_pass
:N # :StorePointer_rel1
E8 %J ; call %StorePointer # Do Common
4829D0 ; sub_rax,rdx # target - ip
48C7C2 01000000 ; mov_rdx, %1 # set the size of chars we want
E8 %z ; call %print_chars
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %k ; jmp %Second_pass
:O # :StorePointer_abs4
E8 %J ; call %StorePointer # Do Common
48C7C2 04000000 ; mov_rdx, %4 # set the size of chars we want
E8 %z ; call %print_chars
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %k ; jmp %Second_pass
:P # :StorePointer_abs2
E8 %J ; call %StorePointer # Do Common
48C7C2 02000000 ; mov_rdx, %2 # set the size of chars we want
E8 %z ; call %print_chars
E8 %H ; call %ClearScratch # Clean up after ourselves
E9 %k ; jmp %Second_pass
:Q # :fail
# Some shit went wrong
48C7C7 01000000 ; mov_rdi, %1 # All is wrong
48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax
0F05 ; syscall # Call it a good day
:S # :write
00000000 ; NULL
00000000 ; NULL
:T # :scratch
00000000 ; NULL
00000000 ; NULL
# :ELF_end

110
tests/data/hex2.hex2 Normal file
View File

@ -0,0 +1,110 @@
# 110 lines 78 code 7 comments 25 blanks
# SPDX-FileCopyrightText: 2019 Jeremiah Orians <jeremiah@pdp10.guru>
#
# SPDX-License-Identifier: GPL-3.0-or-later
## ELF Header
:ELF_base
7F 45 4C 46 ## e_ident[EI_MAG0-3] ELF's magic number
02 ## e_ident[EI_CLASS] Indicating 64 bit
01 ## e_ident[EI_DATA] Indicating little endianness
01 ## e_ident[EI_VERSION] Indicating original elf
03 ## e_ident[EI_OSABI] Set at 3 because FreeBSD is strict
00 ## e_ident[EI_ABIVERSION] Set at 0 because none cares
00 00 00 00 00 00 00 ## e_ident[EI_PAD]
02 00 ## e_type Indicating Executable
3E 00 ## e_machine Indicating AMD64
01 00 00 00 ## e_version Indicating original elf
&_start 00 00 00 00 ## e_entry Address of the entry point (Number of bytes this header is + Base Address)
%ELF_program_headers>ELF_base 00 00 00 00 ## e_phoff Address of program header table
00 00 00 00 00 00 00 00 ## e_shoff Address of section header table
00 00 00 00 ## e_flags
40 00 ## e_ehsize Indicating our 64 Byte header
38 00 ## e_phentsize size of a program header table
01 00 ## e_phnum number of entries in program table
00 00 ## e_shentsize size of a section header table
00 00 ## e_shnum number of entries in section table
00 00 ## e_shstrndx index of the section names
## Program Header
:ELF_program_headers
01 00 00 00 ## p_type
07 00 00 00 ## ph_flags: PF-X|PF-W|PF-R = 7
00 00 00 00 00 00 00 00 ## p_offset
&ELF_base 00 00 00 00 ## p_vaddr
&ELF_base 00 00 00 00 ## p_physaddr
%ELF_end>ELF_base 00 00 00 00 ## p_filesz
%ELF_end>ELF_base 00 00 00 00 ## p_memsz
01 00 00 00 00 00 00 00 ## Required alignment
:ELF_text
:_start
58 ; pop_rax # Get the number of arguments
5F ; pop_rdi # Get the program name
5F ; pop_rdi # Get the actual output name
48C7C6 41020000 ; mov_rsi, %577 # Prepare file as O_WRONLY|O_CREAT|O_TRUNC
48C7C2 80010000 ; mov_rdx, %384 # Prepare file as RW for owner only (600 in octal)
48C7C0 02000000 ; mov_rax, %2 # the syscall number for open()
0F05 ; syscall # Now open that file
4989C7 ; mov_r15,rax # Preserve the file pointer we were given
48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK
48C7C7 00000000 ; mov_rdi, %0 # Get current brk
0F05 ; syscall # Let the kernel do the work
4989C6 ; mov_r14,rax # Set our malloc pointer
48C7C0 0C000000 ; mov_rax, %12 # the Syscall # for SYS_BRK
4C89F7 ; mov_r14,rax # Using current pointer
4881C7 00001000 ; add_rdi, %0x100000 # Allocate 1MB
0F05 ; syscall # Let the kernel do the work
:core
5F ; pop_rdi # Get the actual input name
4883FF 00 ; cmp_rdi, !0 # Check for null string
0F84 %done ; je %done # Hit null be done
48C7C6 00000000 ; mov_rsi, %0 # prepare read_only
48C7C2 00000000 ; mov_rdx, %0 # prevent any interactions
48C7C0 02000000 ; mov_rax, %2 # the syscall number for open()
0F05 ; syscall # Now open that damn file
4989C5 ; mov_r13,rax # Protect INPUT
:keep
48C7C2 00001000 ; mov_rdx, %0x100000 # set the size of chars we want
4C89F6 ; mov_rsi,r14 # Where to put it
4C89EF ; mov_rdi,r13 # Where are we reading from
48C7C0 00000000 ; mov_rax, %0 # the syscall number for read
0F05 ; syscall # call the Kernel
50 ; push_rax # Protect the number of bytes read
4889C2 ; mov_rdx,rax # Number of bytes to write
4C89F6 ; mov_rsi,r14 # What we are writing
4C89FF ; mov_rdi,r15 # Write to target file
48C7C0 01000000 ; mov_rax, %1 # the syscall number for write
0F05 ; syscall # call the Kernel
58 ; pop_rax # Get bytes read
483D 00001000 ; cmp_rax, %0x100000 # Check if buffer was fully used
0F84 %keep ; je %keep # Keep looping if was full
E9 %core ; jmp %core # Otherwise move to next file
:done
# program completed Successfully
48C7C7 00000000 ; mov_rdi, %0 # All is well
48C7C0 3C000000 ; mov_rax, %0x3C # put the exit syscall number in eax
0F05 ; syscall # Call it a good day
:ELF_end

43
tests/data/kaem.kaem Normal file
View File

@ -0,0 +1,43 @@
# 43 lines 2 code 33 comments 8 blanks
#! /usr/bin/env bash
# Mes --- Maxwell Equations of Software
# Copyright © 2017 Jan Nieuwenhuizen <janneke@gnu.org>
# Copyright © 2017 Jeremiah Orians
#
# This file is part of Mes.
#
# Mes is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or (at
# your option) any later version.
#
# Mes is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Mes. If not, see <http://www.gnu.org/licenses/>.
# Can also be run by kaem or any other shell of your personal choice
# To run in kaem simply: kaem --verbose --strict
# Warning all binaries prior to the use of blood-elf will not be readable by
# Objdump, you may need to use ndism or gdb to view the assembly in the binary.
###############################################
# Phase-0 Build hex0 from bootstrapped binary #
###############################################
./bootstrap-seeds/POSIX/AMD64/hex0-seed ./AMD64/hex0_AMD64.hex0 ./AMD64/artifact/hex0
# hex0 should have the exact same checksum as hex0-seed as they are both supposed
# to be built from hex0_amd64.hex0 and by definition must be identical
#########################################
# Phase-0b Build minimal kaem from hex0 #
#########################################
./AMD64/artifact/hex0 ./AMD64/kaem-minimal.hex0 ./AMD64/artifact/kaem-0
# for checksum validation reasons

280
tests/data/m1.m1 Normal file
View File

@ -0,0 +1,280 @@
# 280 lines 197 code 47 comments 36 blanks
## Copyright (C) 2016 Jeremiah Orians
## This file is part of stage0.
##
## stage0 is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## stage0 is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with stage0. If not, see <http://www.gnu.org/licenses/>.
# M2-Planet standards
DEFINE NULL 00000000
# Registers
DEFINE R0 0
DEFINE R1 1
DEFINE R2 2
DEFINE R3 3
DEFINE R4 4
DEFINE R5 5
DEFINE R6 6
DEFINE R7 7
DEFINE R8 8
DEFINE R9 9
DEFINE R10 A
DEFINE R11 B
DEFINE R12 C
DEFINE BP C
DEFINE R13 D
DEFINE SP D
DEFINE R14 E
DEFINE LR E
DEFINE R15 F
DEFINE PC F
# Register masks for push/pop16
DEFINE {R0} 0100
DEFINE {R1} 0200
DEFINE {R2} 0400
DEFINE {R3} 0800
DEFINE {R4} 1000
DEFINE {R8} 0001
DEFINE {R9} 0002
DEFINE {R10} 0004
DEFINE {R11} 0008
DEFINE {BP} 0010
DEFINE {LR} 0040
# Bitshift constants
DEFINE NO_SHIFT 0
DEFINE LEFT 1
DEFINE RIGHT 3
DEFINE ARITH_RIGHT 5
# LOAD/STORE
DEFINE HALF_MEMORY E1
DEFINE MEMORY E5
DEFINE NO_OFFSET B0
DEFINE STORE32 08
DEFINE STORE16 0C
DEFINE STORE8 0C
DEFINE LOAD32 09
DEFINE LOADU8 0
DEFINE LOADS8 D0
DEFINE LOADS16 F0
DEFINE LOAD 0D
DEFINE LOADI8_ALWAYS 0A0E3
DEFINE LOADI8_G 0A0C3
DEFINE LOADI8_GE 0A0A3
DEFINE LOADI8_EQUAL 0A003
DEFINE LOADI8_NE 0A013
DEFINE LOADI8_LE 0A0D3
DEFINE LOADI8_L 0A0B3
DEFINE LOADI8_HI 0A083
DEFINE LOADI8_HS 0A023
DEFINE LOADI8_LS 0A093
DEFINE LOADI8_LO 0A033
# JUMP/BRANCH
DEFINE JUMP_ALWAYS EA
DEFINE JUMP_EQUAL 0A
DEFINE JUMP_NE 1A
DEFINE CALL_ALWAYS EB
DEFINE CALL_REG_ALWAYS FF2FE1
DEFINE RETURN FF2FE1
# Data movement
DEFINE MOVE_ALWAYS A0E1
DEFINE MVN_ALWAYS 0E0E1
DEFINE MVN_LT 0E0B1
DEFINE MVNI8_EQUAL 0E003
DEFINE PUSH_ALWAYS 2DE9
DEFINE POP_ALWAYS BDE8
# Arithmetic/logic
DEFINE AUX_ALWAYS E1
DEFINE IMM_ALWAYS E3
DEFINE ARITH_ALWAYS E2
DEFINE ARITH_GE A2
DEFINE ARITH_LT B2
DEFINE ARITH_NE 12
DEFINE ARITH2_ALWAYS E0
DEFINE ARITH2_GE A0
DEFINE ADC 0A
DEFINE ADCS 0B
DEFINE ADD 08
DEFINE ADDS 09
DEFINE AND 00
DEFINE CMP 005
DEFINE CMPI8 005
DEFINE MUL 0
DEFINE MULS 1
DEFINE OR 08
DEFINE SHIFT A0
DEFINE SUB 04
DEFINE RSUB 06
DEFINE XOR 02
# SYSCALL
DEFINE SYSCALL_ALWAYS 000000EF
## Copyright (C) 2016 Jeremiah Orians
## This file is part of M2-Planet.
##
## M2-Planet is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## M2-Planet is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with M2-Planet. If not, see <http://www.gnu.org/licenses/>.
:_start
'0' SP BP NO_SHIFT MOVE_ALWAYS ; Setup Base Pointer
;; Prepare argv
!4 R0 ADD BP ARITH_ALWAYS ; ARGV_address = BP + 4
{R0} PUSH_ALWAYS ; Put argv on the stack
;; Prepare envp
'0' BP R0 NO_SHIFT MOVE_ALWAYS ; Address we need to load from
!0 R0 LOAD32 R0 MEMORY ; Get ARGC
!2 R0 ADD R0 ARITH_ALWAYS ; OFFSET = ARGC + 2
'0' R0 R0 '1' MOVE_ALWAYS ; OFFSET = OFFSET * WORDSIZE
'0' R0 R0 ADD BP ARITH2_ALWAYS ; ENVP_address = BP + OFFSET
{R0} PUSH_ALWAYS ; Put envp on the stack
;; Stack offset
!4 BP ADD BP ARITH_ALWAYS ; Fix BP
^~FUNCTION___init_malloc CALL_ALWAYS ; Setup for malloc
^~FUNCTION___init_io CALL_ALWAYS ; Setup for FILE*
^~FUNCTION_main CALL_ALWAYS ; Jump right into main
{R1} POP_ALWAYS ; Fix stack
{R1} POP_ALWAYS ; Fix stack
{R1} POP_ALWAYS ; Fix stack
{R0} PUSH_ALWAYS ; put return on the stack
{R0} PUSH_ALWAYS ; So that _exit will have it
{R0} PUSH_ALWAYS ; So that _exit will have it
:FUNCTION_exit
^~FUNCTION___kill_io CALL_ALWAYS
:FUNCTION__exit
!4 R0 SUB R12 ARITH_ALWAYS
!0 R0 LOAD32 R0 MEMORY
!1 R7 LOADI8_ALWAYS
SYSCALL_ALWAYS ; exit
# Unsigned Divide
:divide
{R4} PUSH_ALWAYS ; Protect R4
{R3} PUSH_ALWAYS ; Protect R3
{R2} PUSH_ALWAYS ; Protect R2
'0' R0 R3 NO_SHIFT MOVE_ALWAYS ; MOV R3,R0
'0' R1 R2 NO_SHIFT MOVE_ALWAYS ; MOV R2,R1
!0 R0 LOADI8_ALWAYS ; MOV R0,#0
!0 CMPI8 R2 IMM_ALWAYS ; CMP R2,#0
!1 R0 SUB R0 ARITH_LT ; SUBLT R0,R0,#1
!0 CMPI8 R3 IMM_ALWAYS ; CMP R3,#0
!0 R3 RSUB R3 ARITH_LT ; RSBLT R3,R3,#0
'0' R0 R0 MVN_LT ; MVNLT R0,R0
'0' R0 R4 NO_SHIFT MOVE_ALWAYS ; MOV R4,R0
!32 R0 LOADI8_ALWAYS ; MOV R0,#32.
!0 R1 LOADI8_ALWAYS ; MOV R1,#0
:divide_loop
'0' R2 R2 ADDS R2 ARITH2_ALWAYS ; ADDS R2,R2,R2
'0' R1 R1 ADCS R1 ARITH2_ALWAYS ; ADCS R1,R1,R1
'0' R3 CMP R1 AUX_ALWAYS ; CMP R1,R3
'0' R3 R1 SUB R1 ARITH2_GE ; SUBGE R1,R1,R3
!1 R2 ADD R2 ARITH_GE ; ADDGE R2,R2,#1
!1 R0 SUB R0 ARITH_ALWAYS ; SUB R0,R0,#1
!0 CMPI8 R0 IMM_ALWAYS ; CMP R0,#0
^~divide_loop JUMP_NE ; BNE loop
'0' R2 R0 NO_SHIFT MOVE_ALWAYS ; MOV R0,R2
{R2} POP_ALWAYS ; Restore R2
{R3} POP_ALWAYS ; Restore R3
{R4} POP_ALWAYS ; Restore R4
'1' LR RETURN
# Signed Divide
:divides
{R4} PUSH_ALWAYS ; Protect R4
{R3} PUSH_ALWAYS ; Protect R3
{R2} PUSH_ALWAYS ; Protect R2
'0' R0 R3 NO_SHIFT MOVE_ALWAYS ; MOV R3,R0
'0' R1 R2 NO_SHIFT MOVE_ALWAYS ; MOV R2,R1
!0 R0 LOADI8_ALWAYS ; MOV R0,#0
!0 CMPI8 R2 IMM_ALWAYS ; CMP R2,#0
!0 R2 RSUB R2 ARITH_LT ; RSBLT R2,R2,#0
!1 R0 SUB R0 ARITH_LT ; SUBLT R0,R0,#1
!0 CMPI8 R3 IMM_ALWAYS ; CMP R3,#0
!0 R3 RSUB R3 ARITH_LT ; RSBLT R3,R3,#0
'0' R0 R0 MVN_LT ; MVNLT R0,R0
'0' R0 R4 NO_SHIFT MOVE_ALWAYS ; MOV R4,R0
!32 R0 LOADI8_ALWAYS ; MOV R0,#32.
!0 R1 LOADI8_ALWAYS ; MOV R1,#0
:divides_loop
'0' R2 R2 ADDS R2 ARITH2_ALWAYS ; ADDS R2,R2,R2
'0' R1 R1 ADCS R1 ARITH2_ALWAYS ; ADCS R1,R1,R1
'0' R3 CMP R1 AUX_ALWAYS ; CMP R1,R3
'0' R3 R1 SUB R1 ARITH2_GE ; SUBGE R1,R1,R3
!1 R2 ADD R2 ARITH_GE ; ADDGE R2,R2,#1
!1 R0 SUB R0 ARITH_ALWAYS ; SUB R0,R0,#1
!0 CMPI8 R0 IMM_ALWAYS ; CMP R0,#0
^~divides_loop JUMP_NE ; BNE loop
!0 CMPI8 R4 IMM_ALWAYS ; CMP R4,#0
!0 R2 RSUB R2 ARITH_NE ; RSBNE R2,R2,#0
'0' R2 R0 NO_SHIFT MOVE_ALWAYS ; MOV R0,R2
{R2} POP_ALWAYS ; Restore R2
{R3} POP_ALWAYS ; Restore R3
{R4} POP_ALWAYS ; Restore R4
'1' LR RETURN
# Unsigned Modulus
:modulus
{LR} PUSH_ALWAYS ; Prepare to leverage divide
^~divide CALL_ALWAYS ; Use divide
'0' R1 R0 NO_SHIFT MOVE_ALWAYS ; MOV R0,R1
{LR} POP_ALWAYS ; Prepare for return
'1' LR RETURN
# Signed Modulus
:moduluss
{LR} PUSH_ALWAYS ; Prepare to leverage divide
^~divides CALL_ALWAYS ; Use divides
'0' R1 R0 NO_SHIFT MOVE_ALWAYS ; MOV R0,R1
{LR} POP_ALWAYS ; Prepare for return
'1' LR RETURN
:GLOBAL__envp
NULL
:mystring
"this is my string\n"