Hard-coded version: 2x faster

This commit is contained in:
Spencer Tipping 2017-03-08 11:27:57 -07:00
parent d216bfc20a
commit dc16dde1f4
3 changed files with 124 additions and 20 deletions

View File

@ -22,8 +22,9 @@ To keep it simple, our processor has four complex-valued registers called `a`,
For each pixel, the interpreter will zero all of the registers and then set `a` For each pixel, the interpreter will zero all of the registers and then set `a`
to the current pixel's coordinates. It then iterates the machine code for up to to the current pixel's coordinates. It then iterates the machine code for up to
255 iterations waiting for register `b` to "overflow" (i.e. for its complex 256 iterations waiting for register `b` to "overflow" (i.e. for its complex
absolute value to exceed 2). absolute value to exceed 2). That means that the code for a standard Mandelbrot
set is `*bb+ab`.
### Simple interpreter ### Simple interpreter
The first thing to do is write up a bare-bones interpreter in C. It would be The first thing to do is write up a bare-bones interpreter in C. It would be
@ -72,17 +73,18 @@ void interpret(complex *registers, char const *code) {
int main(int argc, char **argv) { int main(int argc, char **argv) {
complex registers[4]; complex registers[4];
int i, x, y; int i, x, y;
printf("P2\n%d %d\n%d\n", 800, 800, 255); char line[1600];
for (y = 0; y < 800; ++y) { printf("P5\n%d %d\n%d\n", 1600, 900, 255);
for (x = 0; x < 800; ++x) { for (y = 0; y < 900; ++y) {
registers[0].r = -2 + 4 * (x / 800.0); for (x = 0; x < 1600; ++x) {
registers[0].i = -2 + 4 * (y / 800.0); registers[0].r = 2 * 1.6 * (x / 1600.0 - 0.5);
registers[0].i = 2 * 0.9 * (y / 900.0 - 0.5);
for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0; for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0;
for (i = 0; i < 255 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i) for (i = 0; i < 256 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i)
interpret(registers, argv[1]); interpret(registers, argv[1]);
printf(" %d", 255 - i); line[x] = i;
} }
printf("\n"); fwrite(line, 1, sizeof(line), stdout);
} }
return 0; return 0;
} }
@ -94,7 +96,70 @@ Now we can see the results by using `display` from ImageMagick
```sh ```sh
$ gcc simple.c -o simple $ gcc simple.c -o simple
$ ./simple *bb+ab | display - # imagemagick version $ ./simple *bb+ab | display - # imagemagick version
$ ./simple *bb+ab > output.ppm # save a grayscale PPM image $ ./simple *bb+ab > output.pgm # save a grayscale PPM image
$ time ./simple *bb+ab > /dev/null # quick benchmark
real 0m2.369s
user 0m2.364s
sys 0m0.000s
$
``` ```
![image](http://storage8.static.itmages.com/i/17/0308/h_1488995804_6848135_3dd0ab2cdf.jpeg) ![image](http://storage2.static.itmages.com/i/17/0308/h_1488996910_5153802_e6927d8be0.jpeg)
### Performance analysis
JIT can basically eliminate the interpreter overhead, which we can easily model
here by replacing `interpret()` with a hard-coded Mandelbrot calculation. This
will provide an upper bound on realistic JIT performance, since we're unlikely
to optimize as well as `gcc` does.
```c
// hardcoded.c
#include <stdio.h>
#include <stdlib.h>
#define sqr(x) ((x) * (x))
typedef struct { double r; double i; } complex;
void interpret(complex *registers, char const *code) {
complex *a = &registers[0];
complex *b = &registers[1];
double r, i;
r = b->r * b->r - b->i * b->i;
i = b->r * b->i + b->i * b->r;
b->r = r;
b->i = i;
b->r += a->r;
b->i += a->i;
}
int main(int argc, char **argv) {
complex registers[4];
int i, x, y;
char line[1600];
printf("P5\n%d %d\n%d\n", 1600, 900, 255);
for (y = 0; y < 900; ++y) {
for (x = 0; x < 1600; ++x) {
registers[0].r = 2 * 1.6 * (x / 1600.0 - 0.5);
registers[0].i = 2 * 0.9 * (y / 900.0 - 0.5);
for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0;
for (i = 0; i < 256 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i)
interpret(registers, argv[1]);
line[x] = i;
}
fwrite(line, 1, sizeof(line), stdout);
}
return 0;
}
```
This version runs about twice as fast as the simple interpreter:
```sh
$ gcc hardcoded.c -o hardcoded
$ time ./hardcoded *bb+ab > /dev/null
real 0m1.329s
user 0m1.328s
sys 0m0.000s
$
```

38
hardcoded.c Normal file
View File

@ -0,0 +1,38 @@
// hardcoded.c
#include <stdio.h>
#include <stdlib.h>
#define sqr(x) ((x) * (x))
typedef struct { double r; double i; } complex;
void interpret(complex *registers, char const *code) {
complex *a = &registers[0];
complex *b = &registers[1];
double r, i;
r = b->r * b->r - b->i * b->i;
i = b->r * b->i + b->i * b->r;
b->r = r;
b->i = i;
b->r += a->r;
b->i += a->i;
}
int main(int argc, char **argv) {
complex registers[4];
int i, x, y;
char line[1600];
printf("P5\n%d %d\n%d\n", 1600, 900, 255);
for (y = 0; y < 900; ++y) {
for (x = 0; x < 1600; ++x) {
registers[0].r = 2 * 1.6 * (x / 1600.0 - 0.5);
registers[0].i = 2 * 0.9 * (y / 900.0 - 0.5);
for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0;
for (i = 0; i < 256 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i)
interpret(registers, argv[1]);
line[x] = i;
}
fwrite(line, 1, sizeof(line), stdout);
}
return 0;
}

View File

@ -37,17 +37,18 @@ void interpret(complex *registers, char const *code) {
int main(int argc, char **argv) { int main(int argc, char **argv) {
complex registers[4]; complex registers[4];
int i, x, y; int i, x, y;
printf("P2\n%d %d\n%d\n", 800, 800, 255); char line[1600];
for (y = 0; y < 800; ++y) { printf("P5\n%d %d\n%d\n", 1600, 900, 255);
for (x = 0; x < 800; ++x) { for (y = 0; y < 900; ++y) {
registers[0].r = -2 + 4 * (x / 800.0); for (x = 0; x < 1600; ++x) {
registers[0].i = -2 + 4 * (y / 800.0); registers[0].r = 2 * 1.6 * (x / 1600.0 - 0.5);
registers[0].i = 2 * 0.9 * (y / 900.0 - 0.5);
for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0; for (i = 1; i < 4; ++i) registers[i].r = registers[i].i = 0;
for (i = 0; i < 255 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i) for (i = 0; i < 256 && sqr(registers[1].r) + sqr(registers[1].i) < 4; ++i)
interpret(registers, argv[1]); interpret(registers, argv[1]);
printf(" %d", 255 - i); line[x] = i;
} }
printf("\n"); fwrite(line, 1, sizeof(line), stdout);
} }
return 0; return 0;
} }