// Compresses files using a backwards LZ77 algorithm. // This is used to compress code after linking so that the compressed code matches the base ROM. #include #include #include #include #include #define VERSION "1.0" #ifndef min # define min(a,b) ((a) < (b) ? (a) : (b)) #endif #define WRITE16(buf,val) do { (buf)[0] = (val) & 0xFF; (buf)[1] = ((val) >> 8) & 0xFF; } while (0) #define WRITE24(buf,val) do { (buf)[0] = (val) & 0xFF; (buf)[1] = ((val) >> 8) & 0xFF; (buf)[2] = ((val) >> 16) & 0xFF; } while (0) #define WRITE32(buf,val) do { (buf)[0] = (val) & 0xFF; (buf)[1] = ((val) >> 8) & 0xFF; (buf)[2] = ((val) >> 16) & 0xFF; (buf)[3] = ((val) >> 24) & 0xFF; } while (0) #define READ16(buf) ((buf)[0] | ((buf)[1] << 8)) #define READ24(buf) ((buf)[0] | ((buf)[1] << 8) | ((buf)[2] << 16)) #define READ32(buf) ((buf)[0] | ((buf)[1] << 8) | ((buf)[2] << 16) | ((buf)[3] << 24)) #define LEN_BITS 4 #define DIST_BITS 12 #define MIN_SUBSEQ 3 #define LEN_MASK ((1 << (LEN_BITS)) - 1) #define DIST_MASK ((1 << (DIST_BITS)) - 1) #define MAX_SUBSEQ ((MIN_SUBSEQ) + LEN_MASK) #define LOOKAHEAD (1 << (DIST_BITS)) typedef struct { uint8_t *pos; uint32_t totalBytesSaved; } BlockInfo; bool FindSubsequence(const uint8_t *buf, const uint8_t *start, const uint8_t *end, size_t *pLen, size_t *pDist) { size_t bestLen = 0; size_t bestDist = 0; size_t maxLookahead = min(LOOKAHEAD + MAX_SUBSEQ, end - buf - 1); for (size_t i = 0; i < maxLookahead; ++i) { const uint8_t *needle = buf; const uint8_t *haystack = buf + 1 + i; if (*needle != *haystack) continue; size_t len = 0; while (needle - len >= start && needle[-len] == haystack[-len] && haystack - len > buf && len < MAX_SUBSEQ) { ++len; } size_t dist = haystack - needle - MIN_SUBSEQ; if (len > bestLen && dist <= DIST_MASK) { bestLen = len; bestDist = dist; } } if (bestLen < MIN_SUBSEQ) return false; *pLen = bestLen; *pDist = bestDist; return true; } bool Compress(const uint8_t *src, uint8_t *dst, size_t size, uint8_t **pResult, size_t *pLen, size_t *pNumIdentical) { const uint8_t *end = src + size; const uint8_t *read = end; uint8_t *writeEnd = dst + size; uint8_t *write = writeEnd - 1; uint8_t *pFlags = write; uint8_t flags; size_t flagCount = 0; int32_t bytesSaved = 0; BlockInfo *blockInfos = malloc(((size + 7) / 8) * sizeof(BlockInfo)); size_t numBlocks = 0; while (read > src) { flags <<= 1; size_t len, dist; if (FindSubsequence(read - 1, src, end, &len, &dist)) { // write length-distance pair size_t pair = (((len - MIN_SUBSEQ) & LEN_MASK) << DIST_BITS) | (dist & DIST_MASK); read -= len; write -= 2; WRITE16(write, pair); flags |= 1; bytesSaved += len - 2; } else { // write literal write -= 1; read -= 1; *write = *read; } flagCount++; if (flagCount == 8) { flagCount = 0; *pFlags = flags; bytesSaved -= 1; flags = 0; write -= 1; pFlags = write; blockInfos[numBlocks].pos = write; blockInfos[numBlocks].totalBytesSaved = bytesSaved; numBlocks++; } } if (flagCount != 0) { blockInfos[numBlocks].pos = write; blockInfos[numBlocks].totalBytesSaved = bytesSaved; numBlocks++; *pFlags = flags << (8 - flagCount); } else { write += 1; } size_t numIdentical = 0; for (int32_t i = 0; i < numBlocks - 1; ++i) { if (blockInfos[i].totalBytesSaved == bytesSaved) { numIdentical = blockInfos[i].pos - write; write += 1; memcpy(write, src, numIdentical); while (write[numIdentical] == read[numIdentical]) { numIdentical += 1; } break; } } free(blockInfos); *pResult = write; *pLen = size - (write - dst); *pNumIdentical = numIdentical; return true; } bool WriteFooter(FILE *fp, const char *fileName, bool addPadding, uint32_t compressedSize, uint32_t originalSize, uint32_t start, uint32_t numIdentical) { size_t padding = 0; if (addPadding) padding = (0x4 - (compressedSize & 0x3)) & 0x3; for (size_t i = 0; i < padding; ++i) fputc(0xFF, fp); compressedSize += padding + 8; uint32_t readOffset = padding + 8; uint32_t writeOffset = originalSize - compressedSize - start; compressedSize -= numIdentical; if (fwrite(&compressedSize, 3, 1, fp) != 1) { if (fp == stdout) fprintf(stderr, "Failed to write compressed size to stdout\n"); else fprintf(stderr, "Failed to write compressed size to '%s'\n", fileName); return false; } if (fwrite(&readOffset, 1, 1, fp) != 1) { if (fp == stdout) fprintf(stderr, "Failed to write read offset to stdout\n"); else fprintf(stderr, "Failed to write read offset to '%s'\n", fileName); return false; } if (fwrite(&writeOffset, 4, 1, fp) != 1) { if (fp == stdout) fprintf(stderr, "Failed to write write offset to stdout\n"); else fprintf(stderr, "Failed to write write offset to '%s'\n", fileName); return false; } return true; } void PrintUsage(const char *program) { printf( "compress " VERSION "\n" "\n" "Usage: %s [-o OUTFILE] [-s START] [-p] -i INFILE\n" " -o OUTFILE\tWrites result to file instead of stdout\n" " -s START \tOffset to start compressing from\n" " -p \tPad compressed data to 4-alignment\n" " -i INFILE \tInput file to compress", program ); } int main(int argc, const char **argv) { const char *program = argv[0]; if (argc == 1) { PrintUsage(program); return 0; } const char *outFile = NULL; const char *inFile = NULL; int start = 0; bool padCompressedBlock = false; for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "-o") == 0) { if (++i >= argc) { fprintf(stderr, "Expected filename after -o\n"); return 1; } outFile = argv[i]; } else if (strcmp(argv[i], "-s") == 0) { if (++i >= argc) { fprintf(stderr, "Expected start offset after -s\n"); return 1; } if (sscanf(argv[i], "%i", &start) != 1) { fprintf(stderr, "Invalid start offset '%s' could not be parsed\n", argv[i]); return 1; } } else if (strcmp(argv[i], "-p") == 0) { padCompressedBlock = true; } else if (strcmp(argv[i], "-i") == 0) { if (++i >= argc) { fprintf(stderr, "Expected filename after -i\n"); return 1; } inFile = argv[i]; } else { fprintf(stderr, "Unknown option '%s'\n", argv[i]); return 1; } } if (inFile == NULL) { PrintUsage(program); fprintf(stderr, "Please provide an input file to compress, see usage above.\n"); return 1; } FILE *fpIn = fopen(inFile, "rb"); if (fpIn == NULL) { fprintf(stderr, "Failed to open input file '%s'\n", inFile); return 1; } fseek(fpIn, 0, SEEK_END); size_t inSize = ftell(fpIn); uint8_t *buf = malloc(inSize); if (buf == NULL) { fprintf(stderr, "Failed to allocate input buffer for '%s'\n", inFile); return 1; } fseek(fpIn, 0, SEEK_SET); if (fread(buf, inSize, 1, fpIn) != 1) { fprintf(stderr, "Failed to read input file '%s'\n", inFile); return 1; } fclose(fpIn); FILE *fpOut = stdout; if (outFile != NULL) { fpOut = fopen(outFile, "wb"); if (fpOut == NULL) { fprintf(stderr, "Failed to open output file '%s'\n", outFile); return 1; } } if (start > 0 && fwrite(buf, start, 1, fpOut) != 1) { if (fpOut == stdout) fprintf(stderr, "Failed to write uncompressed header to stdout\n"); else fprintf(stderr, "Failed to write uncompressed header to '%s'\n", outFile); return 1; } uint8_t *outBuf = malloc(inSize - start); uint8_t *result; size_t resultLen, numIdentical; if (!Compress(buf + start, outBuf, inSize - start, &result, &resultLen, &numIdentical)) return 1; if (fwrite(result, resultLen, 1, fpIn) != 1) { if (fpOut == stdout) fprintf(stderr, "Failed to write compressed output to stdout\n"); else fprintf(stderr, "Failed to write compressed output to '%s'\n", outFile); return 1; } if (!WriteFooter(fpOut, outFile, padCompressedBlock, resultLen, inSize, start, numIdentical)) return 1; free(outBuf); if (outFile != NULL) fclose(fpOut); free(buf); }