Compilation strategy, prototype JIT
This commit is contained in:
parent
00e4962605
commit
59a8f58fcd
151
README.md
151
README.md
|
|
@ -177,20 +177,31 @@ assembly for `simple.c` to see how it works:
|
||||||
$ gcc -S simple.c
|
$ gcc -S simple.c
|
||||||
```
|
```
|
||||||
|
|
||||||
Edited/annotated highlights from the assembly `simple.s` (whose floating-point
|
Edited/annotated highlights from the assembly `simple.s`, which is much more
|
||||||
code is a little circuitous):
|
complicated than what we'll end up generating:
|
||||||
|
|
||||||
```s
|
```s
|
||||||
interpret:
|
interpret:
|
||||||
|
// The stack contains local variables referenced to the "base pointer"
|
||||||
|
// stored in hardware register %rbp. Here's the layout:
|
||||||
|
//
|
||||||
|
// double i = -8(%rbp)
|
||||||
|
// double r = -16(%rbp)
|
||||||
|
// src = -24(%rbp)
|
||||||
|
// dst = -32(%rbp)
|
||||||
|
// registers = -40(%rbp) <- comes in as an argument in %rdi
|
||||||
|
// code = -48(%rbp) <- comes in as an argument in %rsi
|
||||||
|
|
||||||
pushq %rbp
|
pushq %rbp
|
||||||
movq %rsp, %rbp // standard x86-64 function header
|
movq %rsp, %rbp // standard x86-64 function header
|
||||||
subq $48, %rsp // allocate space for local variables
|
subq $48, %rsp // allocate space for six local vars
|
||||||
movq %rdi, -40(%rbp) // callee saves %rsi and %rdi
|
movq %rdi, -40(%rbp) // registers arg -> local var
|
||||||
movq %rsi, -48(%rbp)
|
movq %rsi, -48(%rbp) // code arg -> local var
|
||||||
jmp for_loop_condition
|
jmp for_loop_condition // commence loopage
|
||||||
|
|
||||||
for_loop_body:
|
for_loop_body:
|
||||||
<a bunch of stuff>
|
// (a bunch of stuff to set up *src and *dst)
|
||||||
|
|
||||||
cmpl $43, %eax // case '+'
|
cmpl $43, %eax // case '+'
|
||||||
je add_branch
|
je add_branch
|
||||||
cmpl $61, %eax // case '='
|
cmpl $61, %eax // case '='
|
||||||
|
|
@ -272,11 +283,125 @@ for_loop_step:
|
||||||
addq $3, -48(%rbp)
|
addq $3, -48(%rbp)
|
||||||
|
|
||||||
for_loop_condition:
|
for_loop_condition:
|
||||||
movq -48(%rbp), %rax
|
movq -48(%rbp), %rax // %rax = code (the pointer)
|
||||||
movzbl (%rax), %eax
|
movzbl (%rax), %eax // %eax = *code (move one byte)
|
||||||
testb %al, %al
|
testb %al, %al // is %eax 0?
|
||||||
jne .L8
|
jne for_loop_body // if no, then continue
|
||||||
nop
|
|
||||||
leave // reset %rsp
|
leave // otherwise rewind stack
|
||||||
ret // pop and jmp
|
ret // pop and jmp
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Compilation strategy
|
||||||
|
Most of the above is register-shuffling fluff that we can get rid of. We're
|
||||||
|
compiling the code up front, which means all of our register addresses are
|
||||||
|
known quantities and we won't need any unknown indirection at runtime. So all
|
||||||
|
of the shuffling into and out of `%rax` can be replaced by a much simpler move
|
||||||
|
directly to or from `N(%rdi)` -- since `%rdi` is the argument that points to
|
||||||
|
the first register's real component.
|
||||||
|
|
||||||
|
If you haven't already, at this point I'd recommend downloading the [Intel
|
||||||
|
software developer's
|
||||||
|
manual](https://software.intel.com/en-us/articles/intel-sdm), of which volume 2
|
||||||
|
describes the semantics and machine code representation of every instruction.
|
||||||
|
|
||||||
|
**NOTE:** GCC uses AT&T assembly syntax, whereas the Intel manuals use Intel
|
||||||
|
assembly syntax. An important difference is that AT&T reverses the arguments:
|
||||||
|
`mov %rax, %rbx` (AT&T syntax) assigns to `%rbx`, whereas `mov rax, rbx` (Intel
|
||||||
|
syntax) assigns to `rax`. All of my code examples use AT&T, and none of this
|
||||||
|
will matter once we're working with machine code.
|
||||||
|
|
||||||
|
##### Example: the Mandelbrot function `*bb+ab`
|
||||||
|
```s
|
||||||
|
// Step 1: multiply register B by itself
|
||||||
|
movsd 16(%rdi), %xmm0 // %xmm0 = b.r
|
||||||
|
movsd 24(%rdi), %xmm1 // %xmm1 = b.i
|
||||||
|
movsd 16(%rdi), %xmm2 // %xmm2 = b.r
|
||||||
|
movsd 24(%rdi), %xmm3 // %xmm3 = b.i
|
||||||
|
movsd %xmm0, %xmm4 // %xmm4 = b.r
|
||||||
|
mulsd %xmm2, %xmm4 // %xmm4 = b.r*b.r
|
||||||
|
movsd %xmm1, %xmm5 // %xmm5 = b.i
|
||||||
|
mulsd %xmm3, %xmm5 // %xmm5 = b.i*b.i
|
||||||
|
subsd %xmm5, %xmm4 // %xmm4 = b.r*b.r - b.i*b.i
|
||||||
|
movsd %xmm4, 16(%rdi) // b.r = %xmm4
|
||||||
|
|
||||||
|
mulsd %xmm0, %xmm3 // %xmm3 = b.r*b.i
|
||||||
|
mulsd %xmm1, %xmm2 // %xmm2 = b.i*b.r
|
||||||
|
addsd %xmm3, %xmm2 // %xmm2 = b.r*b.i + b.i*b.r
|
||||||
|
movsd %xmm2, 24(%rdi) // b.i = %xmm2
|
||||||
|
|
||||||
|
// Step 2: add register A to register B
|
||||||
|
movpd (%rdi), %xmm0 // %xmm0 = (a.r, a.i)
|
||||||
|
addpd %xmm0, 16(%rdi) // (b.r, b.i) += %xmm0
|
||||||
|
```
|
||||||
|
|
||||||
|
The multiplication code isn't optimized for the squaring-a-register use case;
|
||||||
|
instead, I left it fully general so we can use it as a template when we start
|
||||||
|
generating machine code.
|
||||||
|
|
||||||
|
### JIT mechanics
|
||||||
|
Rather than compiling a real language, let's just get a basic JIT setup.
|
||||||
|
|
||||||
|
```c
|
||||||
|
// jitproto.c
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
typedef long(*fn)(long);
|
||||||
|
|
||||||
|
fn compile_identity(void) {
|
||||||
|
// Allocate some memory and set its permissions correctly. In particular, we
|
||||||
|
// need PROT_EXEC (which isn't normally enabled for data memory, e.g. from
|
||||||
|
// malloc()), which tells the processor it's ok to execute it as machine
|
||||||
|
// code.
|
||||||
|
char *memory = mmap(NULL, // address
|
||||||
|
4096, // size
|
||||||
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||||
|
-1, // fd (not used here)
|
||||||
|
0); // offset (not used here)
|
||||||
|
if (!memory) {
|
||||||
|
perror("failed to allocate memory");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
// mov %rdi, %rax
|
||||||
|
memory[i++] = 0x48; // REX.W prefix
|
||||||
|
memory[i++] = 0x8b; // MOV opcode, register/register
|
||||||
|
memory[i++] = 0xc7; // MOD/RM byte for %rdi -> %rax
|
||||||
|
|
||||||
|
// ret
|
||||||
|
memory[i++] = 0xc3; // RET opcode
|
||||||
|
|
||||||
|
return (long(*)(long)) memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
fn f = compile_identity();
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 10; ++i)
|
||||||
|
printf("f(%d) = %ld\n", i, (*f)(i));
|
||||||
|
munmap((void*) f, 4096);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This does what we expect: we've just produced an identity function.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
$ gcc jitproto.c -o jitproto
|
||||||
|
$ ./jitproto
|
||||||
|
f(0) = 0
|
||||||
|
f(1) = 1
|
||||||
|
f(2) = 2
|
||||||
|
f(3) = 3
|
||||||
|
f(4) = 4
|
||||||
|
f(5) = 5
|
||||||
|
f(6) = 6
|
||||||
|
f(7) = 7
|
||||||
|
f(8) = 8
|
||||||
|
f(9) = 9
|
||||||
|
```
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
// jitproto.c
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
typedef long(*fn)(long);
|
||||||
|
|
||||||
|
fn compile_identity(void) {
|
||||||
|
// Allocate some memory and set its permissions correctly. In particular, we
|
||||||
|
// need PROT_EXEC (which isn't normally enabled for data memory, e.g. from
|
||||||
|
// malloc()), which tells the processor it's ok to execute it as machine
|
||||||
|
// code.
|
||||||
|
char *memory = mmap(NULL, // address
|
||||||
|
4096, // size
|
||||||
|
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||||
|
-1, // fd (not used here)
|
||||||
|
0); // offset (not used here)
|
||||||
|
if (!memory) {
|
||||||
|
perror("failed to allocate memory");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
// mov %rdi, %rax
|
||||||
|
memory[i++] = 0x48; // REX.W prefix
|
||||||
|
memory[i++] = 0x8b; // MOV opcode, register/register
|
||||||
|
memory[i++] = 0xc7; // MOD/RM byte for %rsi -> %rax
|
||||||
|
|
||||||
|
// ret
|
||||||
|
memory[i++] = 0xc3; // RET opcode
|
||||||
|
|
||||||
|
return (long(*)(long)) memory;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
fn f = compile_identity();
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < 10; ++i)
|
||||||
|
printf("f(%d) = %ld\n", i, (*f)(i));
|
||||||
|
munmap((void*) f, 4096);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue